| { |
| "best_metric": 0.84375, |
| "best_model_checkpoint": "mvit_v1_b_Kinetics400_transf_c_rwf2000/checkpoint-760", |
| "epoch": 6.1, |
| "eval_steps": 500, |
| "global_step": 1330, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005263157894736842, |
| "grad_norm": 20.39082908630371, |
| "learning_rate": 9.947368421052632e-06, |
| "loss": 6.5828, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.010526315789473684, |
| "grad_norm": 30.1745548248291, |
| "learning_rate": 9.894736842105264e-06, |
| "loss": 6.3269, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.015789473684210527, |
| "grad_norm": 21.304548263549805, |
| "learning_rate": 9.842105263157896e-06, |
| "loss": 5.5401, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.021052631578947368, |
| "grad_norm": 34.847957611083984, |
| "learning_rate": 9.789473684210527e-06, |
| "loss": 5.1716, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02631578947368421, |
| "grad_norm": 29.04662322998047, |
| "learning_rate": 9.736842105263159e-06, |
| "loss": 4.3721, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.031578947368421054, |
| "grad_norm": 31.47679901123047, |
| "learning_rate": 9.68421052631579e-06, |
| "loss": 4.0561, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03684210526315789, |
| "grad_norm": 30.9488468170166, |
| "learning_rate": 9.631578947368422e-06, |
| "loss": 3.3662, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.042105263157894736, |
| "grad_norm": 23.594987869262695, |
| "learning_rate": 9.578947368421054e-06, |
| "loss": 1.9555, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04736842105263158, |
| "grad_norm": 29.11677360534668, |
| "learning_rate": 9.526315789473684e-06, |
| "loss": 1.8569, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 37.965518951416016, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 1.514, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05789473684210526, |
| "grad_norm": 13.468791961669922, |
| "learning_rate": 9.421052631578949e-06, |
| "loss": 0.9838, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06315789473684211, |
| "grad_norm": 30.70915412902832, |
| "learning_rate": 9.36842105263158e-06, |
| "loss": 0.7295, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06842105263157895, |
| "grad_norm": 13.8804292678833, |
| "learning_rate": 9.315789473684212e-06, |
| "loss": 0.6841, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07368421052631578, |
| "grad_norm": 17.479394912719727, |
| "learning_rate": 9.263157894736842e-06, |
| "loss": 0.9459, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07894736842105263, |
| "grad_norm": 8.364581108093262, |
| "learning_rate": 9.210526315789474e-06, |
| "loss": 0.7621, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08421052631578947, |
| "grad_norm": 15.02173137664795, |
| "learning_rate": 9.157894736842105e-06, |
| "loss": 0.6913, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08947368421052632, |
| "grad_norm": 23.994060516357422, |
| "learning_rate": 9.105263157894739e-06, |
| "loss": 0.6554, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.09473684210526316, |
| "grad_norm": 13.736618041992188, |
| "learning_rate": 9.05263157894737e-06, |
| "loss": 0.746, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 7.4077558517456055, |
| "learning_rate": 9e-06, |
| "loss": 0.4837, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_accuracy": 0.66875, |
| "eval_f1": 0.6374674019922192, |
| "eval_loss": 0.9726358652114868, |
| "eval_precision": 0.7576950608446671, |
| "eval_runtime": 70.973, |
| "eval_samples_per_second": 2.254, |
| "eval_steps_per_second": 0.282, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0052631578947369, |
| "grad_norm": 29.950836181640625, |
| "learning_rate": 8.947368421052632e-06, |
| "loss": 0.7972, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0105263157894737, |
| "grad_norm": 7.4079694747924805, |
| "learning_rate": 8.894736842105264e-06, |
| "loss": 0.5114, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0157894736842106, |
| "grad_norm": 13.153223991394043, |
| "learning_rate": 8.842105263157895e-06, |
| "loss": 0.427, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0210526315789474, |
| "grad_norm": 23.603761672973633, |
| "learning_rate": 8.789473684210527e-06, |
| "loss": 0.8155, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0263157894736843, |
| "grad_norm": 12.597455024719238, |
| "learning_rate": 8.736842105263158e-06, |
| "loss": 0.7769, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0315789473684212, |
| "grad_norm": 12.40769100189209, |
| "learning_rate": 8.68421052631579e-06, |
| "loss": 0.6717, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0368421052631578, |
| "grad_norm": 7.567124366760254, |
| "learning_rate": 8.631578947368422e-06, |
| "loss": 0.5306, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0421052631578946, |
| "grad_norm": 16.219642639160156, |
| "learning_rate": 8.578947368421053e-06, |
| "loss": 0.4013, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.0473684210526315, |
| "grad_norm": 15.891722679138184, |
| "learning_rate": 8.526315789473685e-06, |
| "loss": 0.473, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 12.381929397583008, |
| "learning_rate": 8.473684210526317e-06, |
| "loss": 0.5054, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0578947368421052, |
| "grad_norm": 3.8112006187438965, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 0.4146, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.063157894736842, |
| "grad_norm": 12.637545585632324, |
| "learning_rate": 8.36842105263158e-06, |
| "loss": 0.483, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.068421052631579, |
| "grad_norm": 16.767824172973633, |
| "learning_rate": 8.315789473684212e-06, |
| "loss": 0.4881, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0736842105263158, |
| "grad_norm": 20.965835571289062, |
| "learning_rate": 8.263157894736843e-06, |
| "loss": 0.4277, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0789473684210527, |
| "grad_norm": 15.695181846618652, |
| "learning_rate": 8.210526315789475e-06, |
| "loss": 0.4985, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0842105263157895, |
| "grad_norm": 5.402801513671875, |
| "learning_rate": 8.157894736842106e-06, |
| "loss": 0.6222, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0894736842105264, |
| "grad_norm": 13.765841484069824, |
| "learning_rate": 8.105263157894736e-06, |
| "loss": 0.4847, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0947368421052632, |
| "grad_norm": 12.656335830688477, |
| "learning_rate": 8.052631578947368e-06, |
| "loss": 0.4337, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 9.778788566589355, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.4771, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_accuracy": 0.8375, |
| "eval_f1": 0.8365807668133248, |
| "eval_loss": 0.4597933888435364, |
| "eval_precision": 0.8452685421994885, |
| "eval_runtime": 70.3913, |
| "eval_samples_per_second": 2.273, |
| "eval_steps_per_second": 0.284, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.0052631578947366, |
| "grad_norm": 3.884115695953369, |
| "learning_rate": 7.947368421052633e-06, |
| "loss": 0.4156, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.0105263157894737, |
| "grad_norm": 13.59132194519043, |
| "learning_rate": 7.894736842105265e-06, |
| "loss": 0.4021, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0157894736842104, |
| "grad_norm": 8.34617805480957, |
| "learning_rate": 7.842105263157895e-06, |
| "loss": 0.3354, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.0210526315789474, |
| "grad_norm": 8.853759765625, |
| "learning_rate": 7.789473684210526e-06, |
| "loss": 0.4284, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.026315789473684, |
| "grad_norm": 2.9599461555480957, |
| "learning_rate": 7.736842105263158e-06, |
| "loss": 0.399, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.031578947368421, |
| "grad_norm": 8.018070220947266, |
| "learning_rate": 7.68421052631579e-06, |
| "loss": 0.5831, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.036842105263158, |
| "grad_norm": 6.710073947906494, |
| "learning_rate": 7.631578947368423e-06, |
| "loss": 0.2981, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.042105263157895, |
| "grad_norm": 2.070084810256958, |
| "learning_rate": 7.578947368421054e-06, |
| "loss": 0.4386, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.0473684210526315, |
| "grad_norm": 5.736349105834961, |
| "learning_rate": 7.526315789473685e-06, |
| "loss": 0.3822, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.0526315789473686, |
| "grad_norm": 11.180822372436523, |
| "learning_rate": 7.473684210526316e-06, |
| "loss": 0.3862, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.057894736842105, |
| "grad_norm": 8.966133117675781, |
| "learning_rate": 7.421052631578948e-06, |
| "loss": 0.5257, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0631578947368423, |
| "grad_norm": 14.565701484680176, |
| "learning_rate": 7.368421052631579e-06, |
| "loss": 0.3742, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.068421052631579, |
| "grad_norm": 10.042845726013184, |
| "learning_rate": 7.315789473684212e-06, |
| "loss": 0.3909, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.0736842105263156, |
| "grad_norm": 10.890286445617676, |
| "learning_rate": 7.263157894736843e-06, |
| "loss": 0.5469, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.0789473684210527, |
| "grad_norm": 5.286346435546875, |
| "learning_rate": 7.210526315789474e-06, |
| "loss": 0.5734, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.0842105263157893, |
| "grad_norm": 11.605768203735352, |
| "learning_rate": 7.157894736842106e-06, |
| "loss": 0.3376, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.0894736842105264, |
| "grad_norm": 15.367936134338379, |
| "learning_rate": 7.1052631578947375e-06, |
| "loss": 0.4938, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.094736842105263, |
| "grad_norm": 16.802827835083008, |
| "learning_rate": 7.052631578947369e-06, |
| "loss": 0.341, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 17.05912208557129, |
| "learning_rate": 7e-06, |
| "loss": 0.2694, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.1, |
| "eval_accuracy": 0.83125, |
| "eval_f1": 0.8297536945812809, |
| "eval_loss": 0.46257010102272034, |
| "eval_precision": 0.84331983805668, |
| "eval_runtime": 69.7179, |
| "eval_samples_per_second": 2.295, |
| "eval_steps_per_second": 0.287, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.0052631578947366, |
| "grad_norm": 3.188321113586426, |
| "learning_rate": 6.947368421052632e-06, |
| "loss": 0.3871, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.0105263157894737, |
| "grad_norm": 2.308584451675415, |
| "learning_rate": 6.894736842105264e-06, |
| "loss": 0.4008, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.0157894736842104, |
| "grad_norm": 5.290454387664795, |
| "learning_rate": 6.842105263157896e-06, |
| "loss": 0.4617, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.0210526315789474, |
| "grad_norm": 5.937681674957275, |
| "learning_rate": 6.789473684210527e-06, |
| "loss": 0.3444, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.026315789473684, |
| "grad_norm": 4.868968963623047, |
| "learning_rate": 6.736842105263158e-06, |
| "loss": 0.3037, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.031578947368421, |
| "grad_norm": 6.8114824295043945, |
| "learning_rate": 6.68421052631579e-06, |
| "loss": 0.2949, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.036842105263158, |
| "grad_norm": 13.931032180786133, |
| "learning_rate": 6.631578947368421e-06, |
| "loss": 0.4389, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.042105263157895, |
| "grad_norm": 18.7010555267334, |
| "learning_rate": 6.578947368421054e-06, |
| "loss": 0.4782, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.0473684210526315, |
| "grad_norm": 5.578834533691406, |
| "learning_rate": 6.526315789473685e-06, |
| "loss": 0.3549, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.0526315789473686, |
| "grad_norm": 24.610780715942383, |
| "learning_rate": 6.473684210526316e-06, |
| "loss": 0.4142, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.057894736842105, |
| "grad_norm": 2.36200213432312, |
| "learning_rate": 6.421052631578948e-06, |
| "loss": 0.284, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.0631578947368423, |
| "grad_norm": 5.415768623352051, |
| "learning_rate": 6.3684210526315795e-06, |
| "loss": 0.3369, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.068421052631579, |
| "grad_norm": 1.3822064399719238, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 0.3621, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.0736842105263156, |
| "grad_norm": 7.259896278381348, |
| "learning_rate": 6.263157894736842e-06, |
| "loss": 0.421, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.0789473684210527, |
| "grad_norm": 29.047313690185547, |
| "learning_rate": 6.2105263157894745e-06, |
| "loss": 0.5442, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.0842105263157893, |
| "grad_norm": 7.356611728668213, |
| "learning_rate": 6.157894736842106e-06, |
| "loss": 0.3321, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.0894736842105264, |
| "grad_norm": 17.060741424560547, |
| "learning_rate": 6.105263157894738e-06, |
| "loss": 0.3558, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.094736842105263, |
| "grad_norm": 9.117140769958496, |
| "learning_rate": 6.0526315789473685e-06, |
| "loss": 0.4388, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 20.096881866455078, |
| "learning_rate": 6e-06, |
| "loss": 0.4363, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.1, |
| "eval_accuracy": 0.84375, |
| "eval_f1": 0.8435972629521016, |
| "eval_loss": 0.3822278082370758, |
| "eval_precision": 0.8450980392156863, |
| "eval_runtime": 67.3361, |
| "eval_samples_per_second": 2.376, |
| "eval_steps_per_second": 0.297, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.005263157894737, |
| "grad_norm": 11.768166542053223, |
| "learning_rate": 5.947368421052632e-06, |
| "loss": 0.5369, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.010526315789473, |
| "grad_norm": 13.316620826721191, |
| "learning_rate": 5.8947368421052634e-06, |
| "loss": 0.2579, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.015789473684211, |
| "grad_norm": 9.412049293518066, |
| "learning_rate": 5.842105263157896e-06, |
| "loss": 0.239, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.021052631578947, |
| "grad_norm": 2.2383475303649902, |
| "learning_rate": 5.789473684210527e-06, |
| "loss": 0.3057, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.026315789473684, |
| "grad_norm": 20.319499969482422, |
| "learning_rate": 5.736842105263158e-06, |
| "loss": 0.2473, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.031578947368421, |
| "grad_norm": 2.4087274074554443, |
| "learning_rate": 5.68421052631579e-06, |
| "loss": 0.3685, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.036842105263158, |
| "grad_norm": 19.121938705444336, |
| "learning_rate": 5.631578947368422e-06, |
| "loss": 0.2928, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.042105263157895, |
| "grad_norm": 23.854658126831055, |
| "learning_rate": 5.578947368421052e-06, |
| "loss": 0.4694, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.0473684210526315, |
| "grad_norm": 14.487116813659668, |
| "learning_rate": 5.526315789473685e-06, |
| "loss": 0.4689, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.052631578947368, |
| "grad_norm": 10.47803020477295, |
| "learning_rate": 5.4736842105263165e-06, |
| "loss": 0.3579, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.057894736842106, |
| "grad_norm": 6.377836227416992, |
| "learning_rate": 5.421052631578948e-06, |
| "loss": 0.3246, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.063157894736842, |
| "grad_norm": 10.962491989135742, |
| "learning_rate": 5.36842105263158e-06, |
| "loss": 0.2626, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.068421052631579, |
| "grad_norm": 15.242006301879883, |
| "learning_rate": 5.315789473684211e-06, |
| "loss": 0.4136, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.073684210526316, |
| "grad_norm": 13.032598495483398, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.4153, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.078947368421052, |
| "grad_norm": 17.038894653320312, |
| "learning_rate": 5.210526315789474e-06, |
| "loss": 0.3384, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.08421052631579, |
| "grad_norm": 6.975522518157959, |
| "learning_rate": 5.157894736842106e-06, |
| "loss": 0.2313, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.089473684210526, |
| "grad_norm": 17.41042137145996, |
| "learning_rate": 5.105263157894738e-06, |
| "loss": 0.2923, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.094736842105263, |
| "grad_norm": 8.513750076293945, |
| "learning_rate": 5.052631578947369e-06, |
| "loss": 0.1948, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 9.249213218688965, |
| "learning_rate": 5e-06, |
| "loss": 0.4551, |
| "step": 950 |
| }, |
| { |
| "epoch": 4.1, |
| "eval_accuracy": 0.8375, |
| "eval_f1": 0.8337595907928389, |
| "eval_loss": 0.567510724067688, |
| "eval_precision": 0.8708791208791208, |
| "eval_runtime": 69.8935, |
| "eval_samples_per_second": 2.289, |
| "eval_steps_per_second": 0.286, |
| "step": 950 |
| }, |
| { |
| "epoch": 5.005263157894737, |
| "grad_norm": 12.310464859008789, |
| "learning_rate": 4.947368421052632e-06, |
| "loss": 0.4447, |
| "step": 960 |
| }, |
| { |
| "epoch": 5.010526315789473, |
| "grad_norm": 6.919663429260254, |
| "learning_rate": 4.894736842105264e-06, |
| "loss": 0.3302, |
| "step": 970 |
| }, |
| { |
| "epoch": 5.015789473684211, |
| "grad_norm": 5.0730133056640625, |
| "learning_rate": 4.842105263157895e-06, |
| "loss": 0.1817, |
| "step": 980 |
| }, |
| { |
| "epoch": 5.021052631578947, |
| "grad_norm": 25.77090835571289, |
| "learning_rate": 4.789473684210527e-06, |
| "loss": 0.3793, |
| "step": 990 |
| }, |
| { |
| "epoch": 5.026315789473684, |
| "grad_norm": 19.587158203125, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 0.2926, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.031578947368421, |
| "grad_norm": 17.99262809753418, |
| "learning_rate": 4.68421052631579e-06, |
| "loss": 0.5002, |
| "step": 1010 |
| }, |
| { |
| "epoch": 5.036842105263158, |
| "grad_norm": 9.718439102172852, |
| "learning_rate": 4.631578947368421e-06, |
| "loss": 0.2441, |
| "step": 1020 |
| }, |
| { |
| "epoch": 5.042105263157895, |
| "grad_norm": 17.285890579223633, |
| "learning_rate": 4.578947368421053e-06, |
| "loss": 0.4185, |
| "step": 1030 |
| }, |
| { |
| "epoch": 5.0473684210526315, |
| "grad_norm": 9.232149124145508, |
| "learning_rate": 4.526315789473685e-06, |
| "loss": 0.2818, |
| "step": 1040 |
| }, |
| { |
| "epoch": 5.052631578947368, |
| "grad_norm": 22.14645004272461, |
| "learning_rate": 4.473684210526316e-06, |
| "loss": 0.3087, |
| "step": 1050 |
| }, |
| { |
| "epoch": 5.057894736842106, |
| "grad_norm": 11.559210777282715, |
| "learning_rate": 4.4210526315789476e-06, |
| "loss": 0.4542, |
| "step": 1060 |
| }, |
| { |
| "epoch": 5.063157894736842, |
| "grad_norm": 13.807418823242188, |
| "learning_rate": 4.368421052631579e-06, |
| "loss": 0.3877, |
| "step": 1070 |
| }, |
| { |
| "epoch": 5.068421052631579, |
| "grad_norm": 13.337950706481934, |
| "learning_rate": 4.315789473684211e-06, |
| "loss": 0.4411, |
| "step": 1080 |
| }, |
| { |
| "epoch": 5.073684210526316, |
| "grad_norm": 14.076698303222656, |
| "learning_rate": 4.2631578947368425e-06, |
| "loss": 0.2447, |
| "step": 1090 |
| }, |
| { |
| "epoch": 5.078947368421052, |
| "grad_norm": 12.362092971801758, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 0.5224, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.08421052631579, |
| "grad_norm": 2.9585299491882324, |
| "learning_rate": 4.157894736842106e-06, |
| "loss": 0.2479, |
| "step": 1110 |
| }, |
| { |
| "epoch": 5.089473684210526, |
| "grad_norm": 19.757415771484375, |
| "learning_rate": 4.105263157894737e-06, |
| "loss": 0.3588, |
| "step": 1120 |
| }, |
| { |
| "epoch": 5.094736842105263, |
| "grad_norm": 23.674379348754883, |
| "learning_rate": 4.052631578947368e-06, |
| "loss": 0.4141, |
| "step": 1130 |
| }, |
| { |
| "epoch": 5.1, |
| "grad_norm": 20.916202545166016, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.2914, |
| "step": 1140 |
| }, |
| { |
| "epoch": 5.1, |
| "eval_accuracy": 0.81875, |
| "eval_f1": 0.8134373366571509, |
| "eval_loss": 0.6118867993354797, |
| "eval_precision": 0.8597249162405219, |
| "eval_runtime": 71.0105, |
| "eval_samples_per_second": 2.253, |
| "eval_steps_per_second": 0.282, |
| "step": 1140 |
| }, |
| { |
| "epoch": 6.005263157894737, |
| "grad_norm": 20.181005477905273, |
| "learning_rate": 3.947368421052632e-06, |
| "loss": 0.3204, |
| "step": 1150 |
| }, |
| { |
| "epoch": 6.010526315789473, |
| "grad_norm": 21.946392059326172, |
| "learning_rate": 3.894736842105263e-06, |
| "loss": 0.4591, |
| "step": 1160 |
| }, |
| { |
| "epoch": 6.015789473684211, |
| "grad_norm": 16.40288543701172, |
| "learning_rate": 3.842105263157895e-06, |
| "loss": 0.2527, |
| "step": 1170 |
| }, |
| { |
| "epoch": 6.021052631578947, |
| "grad_norm": 22.849788665771484, |
| "learning_rate": 3.789473684210527e-06, |
| "loss": 0.419, |
| "step": 1180 |
| }, |
| { |
| "epoch": 6.026315789473684, |
| "grad_norm": 4.809814453125, |
| "learning_rate": 3.736842105263158e-06, |
| "loss": 0.2078, |
| "step": 1190 |
| }, |
| { |
| "epoch": 6.031578947368421, |
| "grad_norm": 19.384782791137695, |
| "learning_rate": 3.6842105263157896e-06, |
| "loss": 0.3582, |
| "step": 1200 |
| }, |
| { |
| "epoch": 6.036842105263158, |
| "grad_norm": 23.3424129486084, |
| "learning_rate": 3.6315789473684217e-06, |
| "loss": 0.3567, |
| "step": 1210 |
| }, |
| { |
| "epoch": 6.042105263157895, |
| "grad_norm": 3.446596145629883, |
| "learning_rate": 3.578947368421053e-06, |
| "loss": 0.2239, |
| "step": 1220 |
| }, |
| { |
| "epoch": 6.0473684210526315, |
| "grad_norm": 20.072446823120117, |
| "learning_rate": 3.5263157894736846e-06, |
| "loss": 0.3906, |
| "step": 1230 |
| }, |
| { |
| "epoch": 6.052631578947368, |
| "grad_norm": 7.216496467590332, |
| "learning_rate": 3.473684210526316e-06, |
| "loss": 0.3055, |
| "step": 1240 |
| }, |
| { |
| "epoch": 6.057894736842106, |
| "grad_norm": 8.636588096618652, |
| "learning_rate": 3.421052631578948e-06, |
| "loss": 0.1265, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.063157894736842, |
| "grad_norm": 0.8843567371368408, |
| "learning_rate": 3.368421052631579e-06, |
| "loss": 0.3457, |
| "step": 1260 |
| }, |
| { |
| "epoch": 6.068421052631579, |
| "grad_norm": 20.05299949645996, |
| "learning_rate": 3.3157894736842107e-06, |
| "loss": 0.3391, |
| "step": 1270 |
| }, |
| { |
| "epoch": 6.073684210526316, |
| "grad_norm": 18.296667098999023, |
| "learning_rate": 3.2631578947368423e-06, |
| "loss": 0.4391, |
| "step": 1280 |
| }, |
| { |
| "epoch": 6.078947368421052, |
| "grad_norm": 12.995136260986328, |
| "learning_rate": 3.210526315789474e-06, |
| "loss": 0.396, |
| "step": 1290 |
| }, |
| { |
| "epoch": 6.08421052631579, |
| "grad_norm": 16.825014114379883, |
| "learning_rate": 3.157894736842105e-06, |
| "loss": 0.3864, |
| "step": 1300 |
| }, |
| { |
| "epoch": 6.089473684210526, |
| "grad_norm": 0.5085676908493042, |
| "learning_rate": 3.1052631578947372e-06, |
| "loss": 0.323, |
| "step": 1310 |
| }, |
| { |
| "epoch": 6.094736842105263, |
| "grad_norm": 26.47535514831543, |
| "learning_rate": 3.052631578947369e-06, |
| "loss": 0.4595, |
| "step": 1320 |
| }, |
| { |
| "epoch": 6.1, |
| "grad_norm": 18.22189712524414, |
| "learning_rate": 3e-06, |
| "loss": 0.5212, |
| "step": 1330 |
| }, |
| { |
| "epoch": 6.1, |
| "eval_accuracy": 0.84375, |
| "eval_f1": 0.8410111689653803, |
| "eval_loss": 0.5304452180862427, |
| "eval_precision": 0.8691894613190133, |
| "eval_runtime": 70.9081, |
| "eval_samples_per_second": 2.256, |
| "eval_steps_per_second": 0.282, |
| "step": 1330 |
| }, |
| { |
| "epoch": 6.1, |
| "step": 1330, |
| "total_flos": 0.0, |
| "train_loss": 0.6979074507727658, |
| "train_runtime": 5881.3964, |
| "train_samples_per_second": 2.584, |
| "train_steps_per_second": 0.323 |
| }, |
| { |
| "epoch": 6.1, |
| "eval_accuracy": 0.895, |
| "eval_f1": 0.894989498949895, |
| "eval_loss": 0.3164408206939697, |
| "eval_precision": 0.8951580632252901, |
| "eval_runtime": 659.8661, |
| "eval_samples_per_second": 1.212, |
| "eval_steps_per_second": 0.152, |
| "step": 1330 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.005 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|