| { |
| "best_metric": 0.9125, |
| "best_model_checkpoint": "mvit_v2_s_Kinetics400_transf_c_rwf2000/checkpoint-570", |
| "epoch": 5.1, |
| "eval_steps": 500, |
| "global_step": 1140, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005263157894736842, |
| "grad_norm": 27.18143653869629, |
| "learning_rate": 9.947368421052632e-06, |
| "loss": 6.767, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.010526315789473684, |
| "grad_norm": 28.846113204956055, |
| "learning_rate": 9.894736842105264e-06, |
| "loss": 5.9538, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.015789473684210527, |
| "grad_norm": 27.071590423583984, |
| "learning_rate": 9.842105263157896e-06, |
| "loss": 5.1204, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.021052631578947368, |
| "grad_norm": 43.807559967041016, |
| "learning_rate": 9.789473684210527e-06, |
| "loss": 4.4247, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02631578947368421, |
| "grad_norm": 29.54512596130371, |
| "learning_rate": 9.736842105263159e-06, |
| "loss": 3.7804, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.031578947368421054, |
| "grad_norm": 24.903718948364258, |
| "learning_rate": 9.68421052631579e-06, |
| "loss": 3.0124, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03684210526315789, |
| "grad_norm": 28.422218322753906, |
| "learning_rate": 9.631578947368422e-06, |
| "loss": 2.417, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.042105263157894736, |
| "grad_norm": 16.815492630004883, |
| "learning_rate": 9.578947368421054e-06, |
| "loss": 1.2391, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04736842105263158, |
| "grad_norm": 31.41044044494629, |
| "learning_rate": 9.526315789473684e-06, |
| "loss": 1.3617, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 22.144113540649414, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 1.2047, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05789473684210526, |
| "grad_norm": 12.424088478088379, |
| "learning_rate": 9.421052631578949e-06, |
| "loss": 0.7313, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06315789473684211, |
| "grad_norm": 35.89997100830078, |
| "learning_rate": 9.36842105263158e-06, |
| "loss": 0.5971, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06842105263157895, |
| "grad_norm": 10.456808090209961, |
| "learning_rate": 9.315789473684212e-06, |
| "loss": 0.61, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07368421052631578, |
| "grad_norm": 27.438720703125, |
| "learning_rate": 9.263157894736842e-06, |
| "loss": 0.77, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07894736842105263, |
| "grad_norm": 10.961087226867676, |
| "learning_rate": 9.210526315789474e-06, |
| "loss": 0.6213, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08421052631578947, |
| "grad_norm": 20.819063186645508, |
| "learning_rate": 9.157894736842105e-06, |
| "loss": 0.6471, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08947368421052632, |
| "grad_norm": 18.45960235595703, |
| "learning_rate": 9.105263157894739e-06, |
| "loss": 0.5697, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.09473684210526316, |
| "grad_norm": 10.544190406799316, |
| "learning_rate": 9.05263157894737e-06, |
| "loss": 0.6217, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 4.165653705596924, |
| "learning_rate": 9e-06, |
| "loss": 0.4345, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_accuracy": 0.725, |
| "eval_f1": 0.7046484309447895, |
| "eval_loss": 0.6745138764381409, |
| "eval_precision": 0.8106125970664365, |
| "eval_runtime": 72.1409, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.277, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0052631578947369, |
| "grad_norm": 20.22187042236328, |
| "learning_rate": 8.947368421052632e-06, |
| "loss": 0.7393, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0105263157894737, |
| "grad_norm": 11.406439781188965, |
| "learning_rate": 8.894736842105264e-06, |
| "loss": 0.431, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0157894736842106, |
| "grad_norm": 15.169920921325684, |
| "learning_rate": 8.842105263157895e-06, |
| "loss": 0.5438, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0210526315789474, |
| "grad_norm": 11.786495208740234, |
| "learning_rate": 8.789473684210527e-06, |
| "loss": 0.831, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0263157894736843, |
| "grad_norm": 14.316608428955078, |
| "learning_rate": 8.736842105263158e-06, |
| "loss": 0.6173, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0315789473684212, |
| "grad_norm": 22.38058090209961, |
| "learning_rate": 8.68421052631579e-06, |
| "loss": 0.5205, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0368421052631578, |
| "grad_norm": 6.769970417022705, |
| "learning_rate": 8.631578947368422e-06, |
| "loss": 0.5761, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0421052631578946, |
| "grad_norm": 23.546852111816406, |
| "learning_rate": 8.578947368421053e-06, |
| "loss": 0.4213, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.0473684210526315, |
| "grad_norm": 13.848394393920898, |
| "learning_rate": 8.526315789473685e-06, |
| "loss": 0.4926, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 6.40794563293457, |
| "learning_rate": 8.473684210526317e-06, |
| "loss": 0.3957, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0578947368421052, |
| "grad_norm": 6.262490749359131, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 0.3729, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.063157894736842, |
| "grad_norm": 9.696098327636719, |
| "learning_rate": 8.36842105263158e-06, |
| "loss": 0.4236, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.068421052631579, |
| "grad_norm": 10.287137985229492, |
| "learning_rate": 8.315789473684212e-06, |
| "loss": 0.4084, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0736842105263158, |
| "grad_norm": 23.936748504638672, |
| "learning_rate": 8.263157894736843e-06, |
| "loss": 0.3948, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0789473684210527, |
| "grad_norm": 19.638160705566406, |
| "learning_rate": 8.210526315789475e-06, |
| "loss": 0.5494, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0842105263157895, |
| "grad_norm": 7.03140926361084, |
| "learning_rate": 8.157894736842106e-06, |
| "loss": 0.5816, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0894736842105264, |
| "grad_norm": 19.78676414489746, |
| "learning_rate": 8.105263157894736e-06, |
| "loss": 0.7327, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0947368421052632, |
| "grad_norm": 24.11279296875, |
| "learning_rate": 8.052631578947368e-06, |
| "loss": 0.392, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 9.393863677978516, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.5054, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_accuracy": 0.85625, |
| "eval_f1": 0.8537302754481498, |
| "eval_loss": 0.3967539668083191, |
| "eval_precision": 0.8826145326397047, |
| "eval_runtime": 73.1162, |
| "eval_samples_per_second": 2.188, |
| "eval_steps_per_second": 0.274, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.0052631578947366, |
| "grad_norm": 6.710114479064941, |
| "learning_rate": 7.947368421052633e-06, |
| "loss": 0.3862, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.0105263157894737, |
| "grad_norm": 11.278571128845215, |
| "learning_rate": 7.894736842105265e-06, |
| "loss": 0.3496, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0157894736842104, |
| "grad_norm": 11.26418685913086, |
| "learning_rate": 7.842105263157895e-06, |
| "loss": 0.4601, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.0210526315789474, |
| "grad_norm": 10.958210945129395, |
| "learning_rate": 7.789473684210526e-06, |
| "loss": 0.4051, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.026315789473684, |
| "grad_norm": 2.237288475036621, |
| "learning_rate": 7.736842105263158e-06, |
| "loss": 0.3642, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.031578947368421, |
| "grad_norm": 8.63414192199707, |
| "learning_rate": 7.68421052631579e-06, |
| "loss": 0.5683, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.036842105263158, |
| "grad_norm": 13.12728500366211, |
| "learning_rate": 7.631578947368423e-06, |
| "loss": 0.3092, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.042105263157895, |
| "grad_norm": 1.1393234729766846, |
| "learning_rate": 7.578947368421054e-06, |
| "loss": 0.4077, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.0473684210526315, |
| "grad_norm": 1.78622567653656, |
| "learning_rate": 7.526315789473685e-06, |
| "loss": 0.3897, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.0526315789473686, |
| "grad_norm": 10.598691940307617, |
| "learning_rate": 7.473684210526316e-06, |
| "loss": 0.3625, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.057894736842105, |
| "grad_norm": 12.993045806884766, |
| "learning_rate": 7.421052631578948e-06, |
| "loss": 0.4397, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0631578947368423, |
| "grad_norm": 25.146648406982422, |
| "learning_rate": 7.368421052631579e-06, |
| "loss": 0.5304, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.068421052631579, |
| "grad_norm": 12.983936309814453, |
| "learning_rate": 7.315789473684212e-06, |
| "loss": 0.4668, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.0736842105263156, |
| "grad_norm": 9.129647254943848, |
| "learning_rate": 7.263157894736843e-06, |
| "loss": 0.4969, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.0789473684210527, |
| "grad_norm": 8.563615798950195, |
| "learning_rate": 7.210526315789474e-06, |
| "loss": 0.4993, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.0842105263157893, |
| "grad_norm": 7.202339172363281, |
| "learning_rate": 7.157894736842106e-06, |
| "loss": 0.3251, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.0894736842105264, |
| "grad_norm": 4.147830963134766, |
| "learning_rate": 7.1052631578947375e-06, |
| "loss": 0.4315, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.094736842105263, |
| "grad_norm": 7.6200270652771, |
| "learning_rate": 7.052631578947369e-06, |
| "loss": 0.3396, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 13.409224510192871, |
| "learning_rate": 7e-06, |
| "loss": 0.2138, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.1, |
| "eval_accuracy": 0.9125, |
| "eval_f1": 0.9122807017543861, |
| "eval_loss": 0.28845420479774475, |
| "eval_precision": 0.9166666666666667, |
| "eval_runtime": 71.7969, |
| "eval_samples_per_second": 2.229, |
| "eval_steps_per_second": 0.279, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.0052631578947366, |
| "grad_norm": 3.221128463745117, |
| "learning_rate": 6.947368421052632e-06, |
| "loss": 0.3898, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.0105263157894737, |
| "grad_norm": 2.5701775550842285, |
| "learning_rate": 6.894736842105264e-06, |
| "loss": 0.3812, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.0157894736842104, |
| "grad_norm": 1.1917780637741089, |
| "learning_rate": 6.842105263157896e-06, |
| "loss": 0.3651, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.0210526315789474, |
| "grad_norm": 14.28803825378418, |
| "learning_rate": 6.789473684210527e-06, |
| "loss": 0.398, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.026315789473684, |
| "grad_norm": 11.742090225219727, |
| "learning_rate": 6.736842105263158e-06, |
| "loss": 0.3914, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.031578947368421, |
| "grad_norm": 7.811966419219971, |
| "learning_rate": 6.68421052631579e-06, |
| "loss": 0.232, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.036842105263158, |
| "grad_norm": 17.144683837890625, |
| "learning_rate": 6.631578947368421e-06, |
| "loss": 0.3637, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.042105263157895, |
| "grad_norm": 19.564409255981445, |
| "learning_rate": 6.578947368421054e-06, |
| "loss": 0.5103, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.0473684210526315, |
| "grad_norm": 8.233375549316406, |
| "learning_rate": 6.526315789473685e-06, |
| "loss": 0.3698, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.0526315789473686, |
| "grad_norm": 18.26775360107422, |
| "learning_rate": 6.473684210526316e-06, |
| "loss": 0.3242, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.057894736842105, |
| "grad_norm": 15.07589340209961, |
| "learning_rate": 6.421052631578948e-06, |
| "loss": 0.338, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.0631578947368423, |
| "grad_norm": 3.9075160026550293, |
| "learning_rate": 6.3684210526315795e-06, |
| "loss": 0.3227, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.068421052631579, |
| "grad_norm": 2.0749523639678955, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 0.4136, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.0736842105263156, |
| "grad_norm": 15.651928901672363, |
| "learning_rate": 6.263157894736842e-06, |
| "loss": 0.4005, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.0789473684210527, |
| "grad_norm": 22.100845336914062, |
| "learning_rate": 6.2105263157894745e-06, |
| "loss": 0.4285, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.0842105263157893, |
| "grad_norm": 2.6055493354797363, |
| "learning_rate": 6.157894736842106e-06, |
| "loss": 0.2041, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.0894736842105264, |
| "grad_norm": 13.344124794006348, |
| "learning_rate": 6.105263157894738e-06, |
| "loss": 0.3862, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.094736842105263, |
| "grad_norm": 0.9060586094856262, |
| "learning_rate": 6.0526315789473685e-06, |
| "loss": 0.388, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 14.1149320602417, |
| "learning_rate": 6e-06, |
| "loss": 0.4265, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.1, |
| "eval_accuracy": 0.9125, |
| "eval_f1": 0.9125, |
| "eval_loss": 0.25910043716430664, |
| "eval_precision": 0.9125, |
| "eval_runtime": 72.0159, |
| "eval_samples_per_second": 2.222, |
| "eval_steps_per_second": 0.278, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.005263157894737, |
| "grad_norm": 8.575451850891113, |
| "learning_rate": 5.947368421052632e-06, |
| "loss": 0.4956, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.010526315789473, |
| "grad_norm": 9.764039993286133, |
| "learning_rate": 5.8947368421052634e-06, |
| "loss": 0.3357, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.015789473684211, |
| "grad_norm": 8.81169605255127, |
| "learning_rate": 5.842105263157896e-06, |
| "loss": 0.2737, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.021052631578947, |
| "grad_norm": 1.05990469455719, |
| "learning_rate": 5.789473684210527e-06, |
| "loss": 0.3039, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.026315789473684, |
| "grad_norm": 19.7120418548584, |
| "learning_rate": 5.736842105263158e-06, |
| "loss": 0.2567, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.031578947368421, |
| "grad_norm": 3.668038845062256, |
| "learning_rate": 5.68421052631579e-06, |
| "loss": 0.3245, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.036842105263158, |
| "grad_norm": 17.96670913696289, |
| "learning_rate": 5.631578947368422e-06, |
| "loss": 0.2581, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.042105263157895, |
| "grad_norm": 31.538679122924805, |
| "learning_rate": 5.578947368421052e-06, |
| "loss": 0.4507, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.0473684210526315, |
| "grad_norm": 14.32706356048584, |
| "learning_rate": 5.526315789473685e-06, |
| "loss": 0.2924, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.052631578947368, |
| "grad_norm": 10.178019523620605, |
| "learning_rate": 5.4736842105263165e-06, |
| "loss": 0.3634, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.057894736842106, |
| "grad_norm": 20.643692016601562, |
| "learning_rate": 5.421052631578948e-06, |
| "loss": 0.3867, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.063157894736842, |
| "grad_norm": 5.015390396118164, |
| "learning_rate": 5.36842105263158e-06, |
| "loss": 0.1877, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.068421052631579, |
| "grad_norm": 6.897916793823242, |
| "learning_rate": 5.315789473684211e-06, |
| "loss": 0.3742, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.073684210526316, |
| "grad_norm": 16.30086326599121, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.5215, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.078947368421052, |
| "grad_norm": 33.295650482177734, |
| "learning_rate": 5.210526315789474e-06, |
| "loss": 0.4182, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.08421052631579, |
| "grad_norm": 2.538074493408203, |
| "learning_rate": 5.157894736842106e-06, |
| "loss": 0.2224, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.089473684210526, |
| "grad_norm": 1.320330262184143, |
| "learning_rate": 5.105263157894738e-06, |
| "loss": 0.2025, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.094736842105263, |
| "grad_norm": 10.492331504821777, |
| "learning_rate": 5.052631578947369e-06, |
| "loss": 0.251, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 12.127493858337402, |
| "learning_rate": 5e-06, |
| "loss": 0.4049, |
| "step": 950 |
| }, |
| { |
| "epoch": 4.1, |
| "eval_accuracy": 0.8875, |
| "eval_f1": 0.8863636363636364, |
| "eval_loss": 0.3837929666042328, |
| "eval_precision": 0.9036458333333333, |
| "eval_runtime": 71.9918, |
| "eval_samples_per_second": 2.222, |
| "eval_steps_per_second": 0.278, |
| "step": 950 |
| }, |
| { |
| "epoch": 5.005263157894737, |
| "grad_norm": 5.86998987197876, |
| "learning_rate": 4.947368421052632e-06, |
| "loss": 0.4153, |
| "step": 960 |
| }, |
| { |
| "epoch": 5.010526315789473, |
| "grad_norm": 7.648462295532227, |
| "learning_rate": 4.894736842105264e-06, |
| "loss": 0.2841, |
| "step": 970 |
| }, |
| { |
| "epoch": 5.015789473684211, |
| "grad_norm": 3.2707762718200684, |
| "learning_rate": 4.842105263157895e-06, |
| "loss": 0.131, |
| "step": 980 |
| }, |
| { |
| "epoch": 5.021052631578947, |
| "grad_norm": 25.58846664428711, |
| "learning_rate": 4.789473684210527e-06, |
| "loss": 0.4913, |
| "step": 990 |
| }, |
| { |
| "epoch": 5.026315789473684, |
| "grad_norm": 13.411537170410156, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 0.2683, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.031578947368421, |
| "grad_norm": 17.36676597595215, |
| "learning_rate": 4.68421052631579e-06, |
| "loss": 0.5226, |
| "step": 1010 |
| }, |
| { |
| "epoch": 5.036842105263158, |
| "grad_norm": 7.734870433807373, |
| "learning_rate": 4.631578947368421e-06, |
| "loss": 0.1967, |
| "step": 1020 |
| }, |
| { |
| "epoch": 5.042105263157895, |
| "grad_norm": 5.902388095855713, |
| "learning_rate": 4.578947368421053e-06, |
| "loss": 0.3835, |
| "step": 1030 |
| }, |
| { |
| "epoch": 5.0473684210526315, |
| "grad_norm": 14.54670238494873, |
| "learning_rate": 4.526315789473685e-06, |
| "loss": 0.2073, |
| "step": 1040 |
| }, |
| { |
| "epoch": 5.052631578947368, |
| "grad_norm": 23.170896530151367, |
| "learning_rate": 4.473684210526316e-06, |
| "loss": 0.2461, |
| "step": 1050 |
| }, |
| { |
| "epoch": 5.057894736842106, |
| "grad_norm": 15.245760917663574, |
| "learning_rate": 4.4210526315789476e-06, |
| "loss": 0.5014, |
| "step": 1060 |
| }, |
| { |
| "epoch": 5.063157894736842, |
| "grad_norm": 2.622382164001465, |
| "learning_rate": 4.368421052631579e-06, |
| "loss": 0.3083, |
| "step": 1070 |
| }, |
| { |
| "epoch": 5.068421052631579, |
| "grad_norm": 14.459348678588867, |
| "learning_rate": 4.315789473684211e-06, |
| "loss": 0.2886, |
| "step": 1080 |
| }, |
| { |
| "epoch": 5.073684210526316, |
| "grad_norm": 13.719648361206055, |
| "learning_rate": 4.2631578947368425e-06, |
| "loss": 0.3146, |
| "step": 1090 |
| }, |
| { |
| "epoch": 5.078947368421052, |
| "grad_norm": 14.046276092529297, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 0.4636, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.08421052631579, |
| "grad_norm": 1.9393348693847656, |
| "learning_rate": 4.157894736842106e-06, |
| "loss": 0.264, |
| "step": 1110 |
| }, |
| { |
| "epoch": 5.089473684210526, |
| "grad_norm": 18.79676628112793, |
| "learning_rate": 4.105263157894737e-06, |
| "loss": 0.3153, |
| "step": 1120 |
| }, |
| { |
| "epoch": 5.094736842105263, |
| "grad_norm": 28.437938690185547, |
| "learning_rate": 4.052631578947368e-06, |
| "loss": 0.4312, |
| "step": 1130 |
| }, |
| { |
| "epoch": 5.1, |
| "grad_norm": 21.330198287963867, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.3757, |
| "step": 1140 |
| }, |
| { |
| "epoch": 5.1, |
| "eval_accuracy": 0.875, |
| "eval_f1": 0.8733976895078335, |
| "eval_loss": 0.4311452805995941, |
| "eval_precision": 0.8949967083607637, |
| "eval_runtime": 71.9264, |
| "eval_samples_per_second": 2.224, |
| "eval_steps_per_second": 0.278, |
| "step": 1140 |
| }, |
| { |
| "epoch": 5.1, |
| "step": 1140, |
| "total_flos": 0.0, |
| "train_loss": 0.6870630094879552, |
| "train_runtime": 5648.6059, |
| "train_samples_per_second": 2.691, |
| "train_steps_per_second": 0.336 |
| }, |
| { |
| "epoch": 5.1, |
| "eval_accuracy": 0.88625, |
| "eval_f1": 0.886120283885864, |
| "eval_loss": 0.2998928725719452, |
| "eval_precision": 0.8880179065868865, |
| "eval_runtime": 375.1017, |
| "eval_samples_per_second": 2.133, |
| "eval_steps_per_second": 0.267, |
| "step": 1140 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.005 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|