| { | |
| "best_metric": 0.848235547542572, | |
| "best_model_checkpoint": "Action_Classification/checkpoint-300", | |
| "epoch": 15.0, | |
| "eval_steps": 100, | |
| "global_step": 4710, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.323575019836426, | |
| "learning_rate": 0.00019957537154989386, | |
| "loss": 0.4771, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 3.951721429824829, | |
| "learning_rate": 0.0001991507430997877, | |
| "loss": 0.4721, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 4.95866584777832, | |
| "learning_rate": 0.00019872611464968155, | |
| "loss": 0.5038, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 5.4051642417907715, | |
| "learning_rate": 0.00019830148619957538, | |
| "loss": 0.5058, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 5.13778018951416, | |
| "learning_rate": 0.00019787685774946923, | |
| "loss": 0.508, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 3.9513375759124756, | |
| "learning_rate": 0.00019745222929936306, | |
| "loss": 0.5479, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 3.7185003757476807, | |
| "learning_rate": 0.00019702760084925691, | |
| "loss": 0.4929, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 3.2211806774139404, | |
| "learning_rate": 0.00019660297239915074, | |
| "loss": 0.4391, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 4.076002597808838, | |
| "learning_rate": 0.0001961783439490446, | |
| "loss": 0.3744, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 5.571686744689941, | |
| "learning_rate": 0.00019575371549893845, | |
| "loss": 0.3922, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.6933333333333334, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 66, | |
| 1, | |
| 9, | |
| 6, | |
| 1, | |
| 5, | |
| 1, | |
| 3, | |
| 7, | |
| 1 | |
| ], | |
| [ | |
| 41, | |
| 96, | |
| 0, | |
| 0, | |
| 8, | |
| 0, | |
| 2, | |
| 1, | |
| 45, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 46, | |
| 1, | |
| 1, | |
| 7, | |
| 4, | |
| 0, | |
| 1, | |
| 3 | |
| ], | |
| [ | |
| 9, | |
| 1, | |
| 4, | |
| 19, | |
| 5, | |
| 3, | |
| 2, | |
| 1, | |
| 4, | |
| 4 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 0, | |
| 3, | |
| 84, | |
| 2, | |
| 1, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 1, | |
| 3, | |
| 2, | |
| 0, | |
| 55, | |
| 3, | |
| 1, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 1, | |
| 54, | |
| 0, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 1, | |
| 4, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 70, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 12, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 14, | |
| 0, | |
| 147, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 0, | |
| 1, | |
| 38, | |
| 3, | |
| 1, | |
| 4, | |
| 4, | |
| 2, | |
| 91 | |
| ] | |
| ], | |
| "eval_loss": 1.0780633687973022, | |
| "eval_runtime": 15.488, | |
| "eval_samples_per_second": 67.794, | |
| "eval_steps_per_second": 4.261, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.284886121749878, | |
| "learning_rate": 0.00019532908704883228, | |
| "loss": 0.5509, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 2.23751163482666, | |
| "learning_rate": 0.00019490445859872614, | |
| "loss": 0.4004, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 3.910186290740967, | |
| "learning_rate": 0.00019447983014861996, | |
| "loss": 0.4871, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 5.422680377960205, | |
| "learning_rate": 0.00019405520169851382, | |
| "loss": 0.3791, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 3.270318031311035, | |
| "learning_rate": 0.00019363057324840765, | |
| "loss": 0.4276, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 4.393375873565674, | |
| "learning_rate": 0.0001932059447983015, | |
| "loss": 0.5457, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 4.6365885734558105, | |
| "learning_rate": 0.00019278131634819533, | |
| "loss": 0.4431, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 3.3216943740844727, | |
| "learning_rate": 0.00019235668789808918, | |
| "loss": 0.3425, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 4.078379154205322, | |
| "learning_rate": 0.000191932059447983, | |
| "loss": 0.4102, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.2445045709609985, | |
| "learning_rate": 0.00019150743099787687, | |
| "loss": 0.439, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.7561904761904762, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 73, | |
| 3, | |
| 6, | |
| 4, | |
| 0, | |
| 3, | |
| 2, | |
| 3, | |
| 3, | |
| 3 | |
| ], | |
| [ | |
| 30, | |
| 121, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 8, | |
| 0, | |
| 32, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 47, | |
| 1, | |
| 1, | |
| 9, | |
| 1, | |
| 0, | |
| 1, | |
| 4 | |
| ], | |
| [ | |
| 7, | |
| 0, | |
| 5, | |
| 28, | |
| 5, | |
| 1, | |
| 0, | |
| 1, | |
| 2, | |
| 3 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 0, | |
| 1, | |
| 88, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 1, | |
| 5, | |
| 3, | |
| 2, | |
| 51, | |
| 0, | |
| 1, | |
| 2, | |
| 3 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 56, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 2, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 74, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 28, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 19, | |
| 2, | |
| 125, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 1, | |
| 15, | |
| 1, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 131 | |
| ] | |
| ], | |
| "eval_loss": 0.8592283129692078, | |
| "eval_runtime": 15.2608, | |
| "eval_samples_per_second": 68.804, | |
| "eval_steps_per_second": 4.325, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 5.532979965209961, | |
| "learning_rate": 0.00019108280254777072, | |
| "loss": 0.3894, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.3239492177963257, | |
| "learning_rate": 0.00019065817409766455, | |
| "loss": 0.3737, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 7.039944171905518, | |
| "learning_rate": 0.0001902335456475584, | |
| "loss": 0.5094, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 3.1441848278045654, | |
| "learning_rate": 0.00018980891719745223, | |
| "loss": 0.4595, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 4.0012054443359375, | |
| "learning_rate": 0.0001893842887473461, | |
| "loss": 0.4973, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 3.918905735015869, | |
| "learning_rate": 0.00018895966029723992, | |
| "loss": 0.5684, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 3.0025856494903564, | |
| "learning_rate": 0.00018853503184713377, | |
| "loss": 0.4679, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 4.386856555938721, | |
| "learning_rate": 0.0001881104033970276, | |
| "loss": 0.3681, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 4.106396198272705, | |
| "learning_rate": 0.00018768577494692146, | |
| "loss": 0.5392, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 3.501603126525879, | |
| "learning_rate": 0.0001872611464968153, | |
| "loss": 0.4664, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.7628571428571429, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 45, | |
| 5, | |
| 20, | |
| 4, | |
| 2, | |
| 6, | |
| 4, | |
| 8, | |
| 3, | |
| 3 | |
| ], | |
| [ | |
| 5, | |
| 154, | |
| 4, | |
| 2, | |
| 1, | |
| 2, | |
| 6, | |
| 1, | |
| 17, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 51, | |
| 1, | |
| 2, | |
| 8, | |
| 1, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 8, | |
| 26, | |
| 8, | |
| 5, | |
| 0, | |
| 0, | |
| 1, | |
| 3 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 89, | |
| 3, | |
| 0, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 11, | |
| 3, | |
| 1, | |
| 55, | |
| 0, | |
| 1, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 3, | |
| 3, | |
| 51, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 10, | |
| 1, | |
| 0, | |
| 4, | |
| 0, | |
| 68, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 26, | |
| 5, | |
| 0, | |
| 1, | |
| 3, | |
| 16, | |
| 1, | |
| 127, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 2, | |
| 9, | |
| 2, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 135 | |
| ] | |
| ], | |
| "eval_loss": 0.848235547542572, | |
| "eval_runtime": 15.3566, | |
| "eval_samples_per_second": 68.375, | |
| "eval_steps_per_second": 4.298, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 3.331725835800171, | |
| "learning_rate": 0.00018683651804670914, | |
| "loss": 0.493, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 4.574372291564941, | |
| "learning_rate": 0.000186411889596603, | |
| "loss": 0.3776, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 4.206468105316162, | |
| "learning_rate": 0.00018598726114649682, | |
| "loss": 0.4086, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 4.6000657081604, | |
| "learning_rate": 0.00018556263269639068, | |
| "loss": 0.4096, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 2.429077625274658, | |
| "learning_rate": 0.0001851380042462845, | |
| "loss": 0.3707, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.4174879789352417, | |
| "learning_rate": 0.00018471337579617836, | |
| "loss": 0.3507, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 5.691073894500732, | |
| "learning_rate": 0.0001842887473460722, | |
| "loss": 0.3365, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 4.105412483215332, | |
| "learning_rate": 0.00018386411889596604, | |
| "loss": 0.3132, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.423787236213684, | |
| "learning_rate": 0.00018343949044585987, | |
| "loss": 0.2726, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 3.5701773166656494, | |
| "learning_rate": 0.00018301486199575373, | |
| "loss": 0.2929, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_accuracy": 0.679047619047619, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 65, | |
| 3, | |
| 9, | |
| 7, | |
| 1, | |
| 1, | |
| 2, | |
| 2, | |
| 10, | |
| 0 | |
| ], | |
| [ | |
| 38, | |
| 113, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 5, | |
| 0, | |
| 35, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 54, | |
| 4, | |
| 1, | |
| 1, | |
| 2, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 8, | |
| 2, | |
| 5, | |
| 31, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 6, | |
| 3, | |
| 80, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 6, | |
| 2, | |
| 16, | |
| 8, | |
| 1, | |
| 34, | |
| 1, | |
| 1, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 55, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 6, | |
| 2, | |
| 6, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 66, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 24, | |
| 2, | |
| 2, | |
| 0, | |
| 0, | |
| 14, | |
| 0, | |
| 135, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 2, | |
| 4, | |
| 56, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 80 | |
| ] | |
| ], | |
| "eval_loss": 1.1280611753463745, | |
| "eval_runtime": 15.4285, | |
| "eval_samples_per_second": 68.056, | |
| "eval_steps_per_second": 4.278, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 5.247598171234131, | |
| "learning_rate": 0.00018259023354564758, | |
| "loss": 0.3167, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 1.9821741580963135, | |
| "learning_rate": 0.0001821656050955414, | |
| "loss": 0.3745, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 3.7514052391052246, | |
| "learning_rate": 0.00018174097664543526, | |
| "loss": 0.3996, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 4.241259574890137, | |
| "learning_rate": 0.0001813163481953291, | |
| "loss": 0.2942, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 3.530998945236206, | |
| "learning_rate": 0.00018089171974522295, | |
| "loss": 0.3888, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.6759068965911865, | |
| "learning_rate": 0.00018046709129511678, | |
| "loss": 0.26, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 3.3739516735076904, | |
| "learning_rate": 0.00018004246284501063, | |
| "loss": 0.4405, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 4.361608982086182, | |
| "learning_rate": 0.00017961783439490446, | |
| "loss": 0.4011, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 2.788034200668335, | |
| "learning_rate": 0.00017919320594479831, | |
| "loss": 0.3427, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 4.486495494842529, | |
| "learning_rate": 0.00017876857749469217, | |
| "loss": 0.4188, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_accuracy": 0.6657142857142857, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 61, | |
| 2, | |
| 11, | |
| 6, | |
| 3, | |
| 5, | |
| 2, | |
| 5, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 53, | |
| 85, | |
| 5, | |
| 0, | |
| 8, | |
| 5, | |
| 3, | |
| 6, | |
| 28, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 51, | |
| 2, | |
| 2, | |
| 5, | |
| 1, | |
| 3, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 4, | |
| 34, | |
| 8, | |
| 0, | |
| 0, | |
| 2, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 89, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 7, | |
| 4, | |
| 5, | |
| 48, | |
| 1, | |
| 5, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 3, | |
| 0, | |
| 54, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 1, | |
| 1, | |
| 1, | |
| 0, | |
| 3, | |
| 0, | |
| 72, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 11, | |
| 18, | |
| 0, | |
| 0, | |
| 2, | |
| 2, | |
| 17, | |
| 8, | |
| 122, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 2, | |
| 42, | |
| 8, | |
| 1, | |
| 10, | |
| 4, | |
| 1, | |
| 83 | |
| ] | |
| ], | |
| "eval_loss": 1.185118556022644, | |
| "eval_runtime": 15.3843, | |
| "eval_samples_per_second": 68.251, | |
| "eval_steps_per_second": 4.29, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 4.058138370513916, | |
| "learning_rate": 0.000178343949044586, | |
| "loss": 0.3391, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 3.33333683013916, | |
| "learning_rate": 0.00017791932059447985, | |
| "loss": 0.3716, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 4.987284183502197, | |
| "learning_rate": 0.00017749469214437368, | |
| "loss": 0.421, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 4.609795570373535, | |
| "learning_rate": 0.00017707006369426754, | |
| "loss": 0.3626, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 2.64127254486084, | |
| "learning_rate": 0.00017664543524416136, | |
| "loss": 0.3032, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 5.4108357429504395, | |
| "learning_rate": 0.00017622080679405522, | |
| "loss": 0.3659, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.7708959579467773, | |
| "learning_rate": 0.00017579617834394905, | |
| "loss": 0.3496, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 4.026036262512207, | |
| "learning_rate": 0.0001753715498938429, | |
| "loss": 0.3223, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 5.661407470703125, | |
| "learning_rate": 0.00017494692144373676, | |
| "loss": 0.3881, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 3.0189332962036133, | |
| "learning_rate": 0.00017452229299363059, | |
| "loss": 0.3668, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_accuracy": 0.7466666666666667, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 53, | |
| 11, | |
| 11, | |
| 5, | |
| 0, | |
| 3, | |
| 1, | |
| 4, | |
| 10, | |
| 2 | |
| ], | |
| [ | |
| 3, | |
| 145, | |
| 5, | |
| 0, | |
| 1, | |
| 1, | |
| 4, | |
| 5, | |
| 29, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 53, | |
| 1, | |
| 1, | |
| 5, | |
| 2, | |
| 1, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 9, | |
| 29, | |
| 5, | |
| 2, | |
| 0, | |
| 0, | |
| 1, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 4, | |
| 3, | |
| 84, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 12, | |
| 3, | |
| 1, | |
| 45, | |
| 0, | |
| 3, | |
| 3, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 2, | |
| 1, | |
| 1, | |
| 0, | |
| 52, | |
| 0, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 5, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 73, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 29, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 7, | |
| 3, | |
| 135, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 11, | |
| 19, | |
| 1, | |
| 5, | |
| 0, | |
| 1, | |
| 0, | |
| 115 | |
| ] | |
| ], | |
| "eval_loss": 0.8554251790046692, | |
| "eval_runtime": 15.3855, | |
| "eval_samples_per_second": 68.246, | |
| "eval_steps_per_second": 4.29, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 4.359609127044678, | |
| "learning_rate": 0.00017409766454352444, | |
| "loss": 0.3581, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 3.1502652168273926, | |
| "learning_rate": 0.00017367303609341827, | |
| "loss": 0.375, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 2.531296968460083, | |
| "learning_rate": 0.00017324840764331212, | |
| "loss": 0.3344, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 4.272879600524902, | |
| "learning_rate": 0.00017282377919320595, | |
| "loss": 0.2658, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 6.005029678344727, | |
| "learning_rate": 0.0001723991507430998, | |
| "loss": 0.2963, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 2.308213710784912, | |
| "learning_rate": 0.00017197452229299363, | |
| "loss": 0.3024, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 2.6260054111480713, | |
| "learning_rate": 0.0001715498938428875, | |
| "loss": 0.3372, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 5.7114057540893555, | |
| "learning_rate": 0.00017112526539278132, | |
| "loss": 0.3561, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 2.8753015995025635, | |
| "learning_rate": 0.00017070063694267517, | |
| "loss": 0.3529, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 1.4016531705856323, | |
| "learning_rate": 0.00017027600849256903, | |
| "loss": 0.342, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_accuracy": 0.7047619047619048, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 58, | |
| 5, | |
| 4, | |
| 4, | |
| 1, | |
| 8, | |
| 1, | |
| 9, | |
| 7, | |
| 3 | |
| ], | |
| [ | |
| 36, | |
| 111, | |
| 0, | |
| 2, | |
| 1, | |
| 4, | |
| 1, | |
| 4, | |
| 34, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 2, | |
| 45, | |
| 5, | |
| 1, | |
| 6, | |
| 1, | |
| 1, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 6, | |
| 0, | |
| 5, | |
| 35, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 3 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 2, | |
| 6, | |
| 77, | |
| 3, | |
| 1, | |
| 0, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 10, | |
| 8, | |
| 1, | |
| 39, | |
| 0, | |
| 7, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 1, | |
| 0, | |
| 2, | |
| 0, | |
| 50, | |
| 0, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 4, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 77, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 29, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 5, | |
| 2, | |
| 140, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 5, | |
| 27, | |
| 0, | |
| 0, | |
| 1, | |
| 7, | |
| 0, | |
| 108 | |
| ] | |
| ], | |
| "eval_loss": 1.0290604829788208, | |
| "eval_runtime": 15.7598, | |
| "eval_samples_per_second": 66.625, | |
| "eval_steps_per_second": 4.188, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 3.78086256980896, | |
| "learning_rate": 0.00016985138004246286, | |
| "loss": 0.2987, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 1.8382972478866577, | |
| "learning_rate": 0.0001694267515923567, | |
| "loss": 0.4537, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 4.91944694519043, | |
| "learning_rate": 0.00016900212314225054, | |
| "loss": 0.264, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 3.87353515625, | |
| "learning_rate": 0.0001685774946921444, | |
| "loss": 0.3189, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 2.6051061153411865, | |
| "learning_rate": 0.00016815286624203822, | |
| "loss": 0.2776, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 3.5838241577148438, | |
| "learning_rate": 0.00016772823779193208, | |
| "loss": 0.286, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 3.6773152351379395, | |
| "learning_rate": 0.0001673036093418259, | |
| "loss": 0.2957, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.5790090560913086, | |
| "learning_rate": 0.00016687898089171976, | |
| "loss": 0.3021, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 1.9358294010162354, | |
| "learning_rate": 0.00016645435244161362, | |
| "loss": 0.2431, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 3.2236168384552, | |
| "learning_rate": 0.00016602972399150744, | |
| "loss": 0.2984, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_accuracy": 0.6961904761904761, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 55, | |
| 3, | |
| 11, | |
| 2, | |
| 0, | |
| 1, | |
| 2, | |
| 11, | |
| 10, | |
| 5 | |
| ], | |
| [ | |
| 44, | |
| 71, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 4, | |
| 5, | |
| 66, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 49, | |
| 3, | |
| 2, | |
| 3, | |
| 3, | |
| 2, | |
| 1, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 5, | |
| 26, | |
| 7, | |
| 0, | |
| 2, | |
| 0, | |
| 1, | |
| 7 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 86, | |
| 0, | |
| 3, | |
| 0, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 2, | |
| 12, | |
| 4, | |
| 2, | |
| 39, | |
| 1, | |
| 3, | |
| 1, | |
| 3 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 78, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 15, | |
| 3, | |
| 0, | |
| 1, | |
| 0, | |
| 9, | |
| 4, | |
| 143, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 17, | |
| 1, | |
| 0, | |
| 3, | |
| 3, | |
| 1, | |
| 127 | |
| ] | |
| ], | |
| "eval_loss": 1.2206960916519165, | |
| "eval_runtime": 15.2799, | |
| "eval_samples_per_second": 68.718, | |
| "eval_steps_per_second": 4.319, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 3.2855160236358643, | |
| "learning_rate": 0.0001656050955414013, | |
| "loss": 0.3444, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 3.5719540119171143, | |
| "learning_rate": 0.00016518046709129513, | |
| "loss": 0.2846, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 4.042548179626465, | |
| "learning_rate": 0.00016475583864118898, | |
| "loss": 0.3975, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 2.08795166015625, | |
| "learning_rate": 0.0001643312101910828, | |
| "loss": 0.2734, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 2.969775676727295, | |
| "learning_rate": 0.00016390658174097667, | |
| "loss": 0.2576, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 2.800004720687866, | |
| "learning_rate": 0.0001634819532908705, | |
| "loss": 0.2583, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 3.8801493644714355, | |
| "learning_rate": 0.00016305732484076435, | |
| "loss": 0.3354, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 2.1636312007904053, | |
| "learning_rate": 0.0001626326963906582, | |
| "loss": 0.2068, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 3.3073160648345947, | |
| "learning_rate": 0.00016220806794055203, | |
| "loss": 0.356, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 3.645033836364746, | |
| "learning_rate": 0.0001617834394904459, | |
| "loss": 0.3542, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_accuracy": 0.6657142857142857, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 66, | |
| 2, | |
| 6, | |
| 8, | |
| 0, | |
| 4, | |
| 4, | |
| 4, | |
| 5, | |
| 1 | |
| ], | |
| [ | |
| 38, | |
| 78, | |
| 2, | |
| 0, | |
| 3, | |
| 1, | |
| 23, | |
| 4, | |
| 43, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 50, | |
| 7, | |
| 0, | |
| 5, | |
| 1, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 2, | |
| 45, | |
| 3, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 3, | |
| 6, | |
| 76, | |
| 3, | |
| 4, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 10, | |
| 8, | |
| 1, | |
| 47, | |
| 0, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 11, | |
| 5, | |
| 0, | |
| 2, | |
| 4, | |
| 59, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 24, | |
| 2, | |
| 1, | |
| 1, | |
| 1, | |
| 23, | |
| 0, | |
| 124, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 1, | |
| 42, | |
| 3, | |
| 0, | |
| 3, | |
| 1, | |
| 0, | |
| 98 | |
| ] | |
| ], | |
| "eval_loss": 1.18353271484375, | |
| "eval_runtime": 15.3461, | |
| "eval_samples_per_second": 68.421, | |
| "eval_steps_per_second": 4.301, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.6855131387710571, | |
| "learning_rate": 0.00016135881104033971, | |
| "loss": 0.2494, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 2.0318846702575684, | |
| "learning_rate": 0.00016093418259023357, | |
| "loss": 0.2848, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 2.573373794555664, | |
| "learning_rate": 0.0001605095541401274, | |
| "loss": 0.2974, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 1.9912055730819702, | |
| "learning_rate": 0.00016008492569002125, | |
| "loss": 0.1985, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 3.447814702987671, | |
| "learning_rate": 0.00015966029723991508, | |
| "loss": 0.2648, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "grad_norm": 2.786409616470337, | |
| "learning_rate": 0.00015923566878980894, | |
| "loss": 0.2106, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "grad_norm": 7.731870174407959, | |
| "learning_rate": 0.00015881104033970276, | |
| "loss": 0.2545, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 5.680171966552734, | |
| "learning_rate": 0.00015838641188959662, | |
| "loss": 0.3026, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "grad_norm": 1.9310322999954224, | |
| "learning_rate": 0.00015796178343949047, | |
| "loss": 0.2678, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "grad_norm": 4.319097995758057, | |
| "learning_rate": 0.0001575371549893843, | |
| "loss": 0.2749, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "eval_accuracy": 0.7285714285714285, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 54, | |
| 12, | |
| 5, | |
| 2, | |
| 3, | |
| 1, | |
| 7, | |
| 1, | |
| 12, | |
| 3 | |
| ], | |
| [ | |
| 13, | |
| 155, | |
| 0, | |
| 0, | |
| 3, | |
| 1, | |
| 2, | |
| 1, | |
| 18, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 53, | |
| 1, | |
| 4, | |
| 1, | |
| 3, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 5, | |
| 1, | |
| 7, | |
| 21, | |
| 8, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 9 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 0, | |
| 1, | |
| 89, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 4, | |
| 16, | |
| 1, | |
| 6, | |
| 34, | |
| 3, | |
| 1, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 1, | |
| 0, | |
| 2, | |
| 0, | |
| 54, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 6, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 70, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 45, | |
| 0, | |
| 1, | |
| 2, | |
| 0, | |
| 13, | |
| 0, | |
| 115, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 6, | |
| 19, | |
| 4, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 120 | |
| ] | |
| ], | |
| "eval_loss": 0.9242235422134399, | |
| "eval_runtime": 15.2447, | |
| "eval_samples_per_second": 68.876, | |
| "eval_steps_per_second": 4.329, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 1.9356917142868042, | |
| "learning_rate": 0.00015711252653927816, | |
| "loss": 0.3368, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 1.9739004373550415, | |
| "learning_rate": 0.00015668789808917199, | |
| "loss": 0.3223, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "grad_norm": 2.2368195056915283, | |
| "learning_rate": 0.00015626326963906584, | |
| "loss": 0.2729, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 2.3171839714050293, | |
| "learning_rate": 0.00015583864118895967, | |
| "loss": 0.2081, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 4.041688442230225, | |
| "learning_rate": 0.00015541401273885352, | |
| "loss": 0.3985, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 2.9750876426696777, | |
| "learning_rate": 0.00015498938428874735, | |
| "loss": 0.3221, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 2.438103675842285, | |
| "learning_rate": 0.0001545647558386412, | |
| "loss": 0.2598, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 3.9497318267822266, | |
| "learning_rate": 0.00015414012738853506, | |
| "loss": 0.2488, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "grad_norm": 4.278964996337891, | |
| "learning_rate": 0.0001537154989384289, | |
| "loss": 0.2826, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 2.7707722187042236, | |
| "learning_rate": 0.00015329087048832275, | |
| "loss": 0.2695, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_accuracy": 0.7314285714285714, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 58, | |
| 8, | |
| 9, | |
| 3, | |
| 0, | |
| 3, | |
| 2, | |
| 5, | |
| 10, | |
| 2 | |
| ], | |
| [ | |
| 29, | |
| 130, | |
| 2, | |
| 0, | |
| 0, | |
| 3, | |
| 1, | |
| 4, | |
| 24, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 49, | |
| 3, | |
| 1, | |
| 6, | |
| 2, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 6, | |
| 1, | |
| 5, | |
| 26, | |
| 6, | |
| 1, | |
| 0, | |
| 1, | |
| 1, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 2, | |
| 4, | |
| 79, | |
| 1, | |
| 1, | |
| 1, | |
| 4, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 12, | |
| 4, | |
| 1, | |
| 48, | |
| 1, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 4, | |
| 1, | |
| 0, | |
| 2, | |
| 2, | |
| 67, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 32, | |
| 0, | |
| 1, | |
| 0, | |
| 4, | |
| 12, | |
| 2, | |
| 125, | |
| 3 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 2, | |
| 10, | |
| 1, | |
| 5, | |
| 2, | |
| 0, | |
| 0, | |
| 129 | |
| ] | |
| ], | |
| "eval_loss": 0.9827544689178467, | |
| "eval_runtime": 15.624, | |
| "eval_samples_per_second": 67.204, | |
| "eval_steps_per_second": 4.224, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 1.2058473825454712, | |
| "learning_rate": 0.00015286624203821657, | |
| "loss": 0.2301, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "grad_norm": 4.785125732421875, | |
| "learning_rate": 0.00015244161358811043, | |
| "loss": 0.1879, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 2.2282400131225586, | |
| "learning_rate": 0.00015201698513800426, | |
| "loss": 0.2526, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 4.476231098175049, | |
| "learning_rate": 0.0001515923566878981, | |
| "loss": 0.2623, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 1.7477174997329712, | |
| "learning_rate": 0.00015116772823779194, | |
| "loss": 0.2361, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 1.9111158847808838, | |
| "learning_rate": 0.0001507430997876858, | |
| "loss": 0.2147, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 2.058471918106079, | |
| "learning_rate": 0.00015031847133757962, | |
| "loss": 0.2567, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 3.280287265777588, | |
| "learning_rate": 0.00014989384288747345, | |
| "loss": 0.3297, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 4.744434356689453, | |
| "learning_rate": 0.0001494692144373673, | |
| "loss": 0.2678, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 1.6539621353149414, | |
| "learning_rate": 0.00014904458598726113, | |
| "loss": 0.2343, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "eval_accuracy": 0.7295238095238096, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 58, | |
| 2, | |
| 10, | |
| 5, | |
| 1, | |
| 1, | |
| 7, | |
| 4, | |
| 10, | |
| 2 | |
| ], | |
| [ | |
| 21, | |
| 115, | |
| 0, | |
| 0, | |
| 4, | |
| 0, | |
| 12, | |
| 3, | |
| 38, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 53, | |
| 2, | |
| 2, | |
| 3, | |
| 0, | |
| 1, | |
| 1, | |
| 2 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 9, | |
| 22, | |
| 9, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 6 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 1, | |
| 1, | |
| 88, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 10, | |
| 4, | |
| 2, | |
| 48, | |
| 1, | |
| 2, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 2, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 7, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 72, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 22, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 14, | |
| 0, | |
| 139, | |
| 0 | |
| ], | |
| [ | |
| 7, | |
| 0, | |
| 4, | |
| 20, | |
| 1, | |
| 3, | |
| 1, | |
| 2, | |
| 0, | |
| 115 | |
| ] | |
| ], | |
| "eval_loss": 1.0871163606643677, | |
| "eval_runtime": 15.2438, | |
| "eval_samples_per_second": 68.881, | |
| "eval_steps_per_second": 4.33, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 2.8714191913604736, | |
| "learning_rate": 0.000148619957537155, | |
| "loss": 0.2571, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 3.6686244010925293, | |
| "learning_rate": 0.00014819532908704882, | |
| "loss": 0.2531, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 1.589486837387085, | |
| "learning_rate": 0.00014777070063694267, | |
| "loss": 0.207, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 2.834082841873169, | |
| "learning_rate": 0.00014734607218683653, | |
| "loss": 0.2296, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 4.031200885772705, | |
| "learning_rate": 0.00014692144373673036, | |
| "loss": 0.2018, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 2.4544031620025635, | |
| "learning_rate": 0.0001464968152866242, | |
| "loss": 0.2817, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 1.3553822040557861, | |
| "learning_rate": 0.00014607218683651804, | |
| "loss": 0.2868, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 1.9867699146270752, | |
| "learning_rate": 0.0001456475583864119, | |
| "loss": 0.2127, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 1.536102533340454, | |
| "learning_rate": 0.00014522292993630572, | |
| "loss": 0.2743, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "grad_norm": 2.0758562088012695, | |
| "learning_rate": 0.00014479830148619958, | |
| "loss": 0.2714, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "eval_accuracy": 0.7314285714285714, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 59, | |
| 6, | |
| 8, | |
| 8, | |
| 1, | |
| 1, | |
| 3, | |
| 4, | |
| 7, | |
| 3 | |
| ], | |
| [ | |
| 23, | |
| 114, | |
| 2, | |
| 1, | |
| 1, | |
| 0, | |
| 5, | |
| 3, | |
| 42, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 54, | |
| 2, | |
| 1, | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 3, | |
| 32, | |
| 4, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 8 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 3, | |
| 5, | |
| 80, | |
| 1, | |
| 1, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 11, | |
| 7, | |
| 2, | |
| 43, | |
| 1, | |
| 2, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 4, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 77, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 6, | |
| 31, | |
| 2, | |
| 1, | |
| 0, | |
| 0, | |
| 10, | |
| 0, | |
| 130, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 1, | |
| 22, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 123 | |
| ] | |
| ], | |
| "eval_loss": 1.0719889402389526, | |
| "eval_runtime": 15.7109, | |
| "eval_samples_per_second": 66.833, | |
| "eval_steps_per_second": 4.201, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "grad_norm": 2.6378695964813232, | |
| "learning_rate": 0.0001443736730360934, | |
| "loss": 0.1884, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 2.4962825775146484, | |
| "learning_rate": 0.00014394904458598726, | |
| "loss": 0.2421, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 5.444900035858154, | |
| "learning_rate": 0.0001435244161358811, | |
| "loss": 0.1745, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 3.8945775032043457, | |
| "learning_rate": 0.00014309978768577494, | |
| "loss": 0.3019, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 4.094300270080566, | |
| "learning_rate": 0.0001426751592356688, | |
| "loss": 0.2902, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 2.9124348163604736, | |
| "learning_rate": 0.00014225053078556263, | |
| "loss": 0.2889, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 1.5475099086761475, | |
| "learning_rate": 0.00014182590233545648, | |
| "loss": 0.1914, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 3.8749759197235107, | |
| "learning_rate": 0.0001414012738853503, | |
| "loss": 0.2835, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 3.574009656906128, | |
| "learning_rate": 0.00014097664543524416, | |
| "loss": 0.2556, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 1.3013603687286377, | |
| "learning_rate": 0.000140552016985138, | |
| "loss": 0.2287, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "eval_accuracy": 0.7057142857142857, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 52, | |
| 5, | |
| 15, | |
| 8, | |
| 1, | |
| 8, | |
| 0, | |
| 3, | |
| 6, | |
| 2 | |
| ], | |
| [ | |
| 27, | |
| 109, | |
| 1, | |
| 0, | |
| 1, | |
| 6, | |
| 2, | |
| 3, | |
| 43, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 55, | |
| 3, | |
| 0, | |
| 3, | |
| 1, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 4, | |
| 34, | |
| 4, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 2, | |
| 4, | |
| 81, | |
| 2, | |
| 1, | |
| 0, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 7, | |
| 3, | |
| 1, | |
| 54, | |
| 0, | |
| 0, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 56, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 12, | |
| 1, | |
| 0, | |
| 3, | |
| 2, | |
| 62, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 30, | |
| 1, | |
| 0, | |
| 0, | |
| 3, | |
| 9, | |
| 0, | |
| 131, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 6, | |
| 4, | |
| 28, | |
| 0, | |
| 4, | |
| 0, | |
| 0, | |
| 0, | |
| 107 | |
| ] | |
| ], | |
| "eval_loss": 1.112500786781311, | |
| "eval_runtime": 15.3686, | |
| "eval_samples_per_second": 68.321, | |
| "eval_steps_per_second": 4.294, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "grad_norm": 2.4055957794189453, | |
| "learning_rate": 0.00014012738853503185, | |
| "loss": 0.1898, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 1.5158133506774902, | |
| "learning_rate": 0.00013970276008492568, | |
| "loss": 0.3715, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 2.5956978797912598, | |
| "learning_rate": 0.00013927813163481953, | |
| "loss": 0.1763, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "grad_norm": 3.058171272277832, | |
| "learning_rate": 0.00013885350318471339, | |
| "loss": 0.2033, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 4.358069896697998, | |
| "learning_rate": 0.00013842887473460721, | |
| "loss": 0.1918, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 3.4236209392547607, | |
| "learning_rate": 0.00013800424628450107, | |
| "loss": 0.2298, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 2.0080718994140625, | |
| "learning_rate": 0.0001375796178343949, | |
| "loss": 0.2217, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 1.7190344333648682, | |
| "learning_rate": 0.00013715498938428875, | |
| "loss": 0.1858, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 1.8413598537445068, | |
| "learning_rate": 0.00013673036093418258, | |
| "loss": 0.2372, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "grad_norm": 6.0616865158081055, | |
| "learning_rate": 0.00013630573248407644, | |
| "loss": 0.2814, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "eval_accuracy": 0.72, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 71, | |
| 3, | |
| 7, | |
| 5, | |
| 2, | |
| 1, | |
| 1, | |
| 6, | |
| 3, | |
| 1 | |
| ], | |
| [ | |
| 53, | |
| 111, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 5, | |
| 5, | |
| 17, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 48, | |
| 4, | |
| 1, | |
| 4, | |
| 0, | |
| 4, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 6, | |
| 0, | |
| 4, | |
| 31, | |
| 6, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 3, | |
| 1, | |
| 4, | |
| 82, | |
| 0, | |
| 1, | |
| 1, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 8, | |
| 0, | |
| 4, | |
| 4, | |
| 1, | |
| 49, | |
| 1, | |
| 4, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 1, | |
| 1, | |
| 0, | |
| 52, | |
| 2, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 80, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 11, | |
| 35, | |
| 2, | |
| 0, | |
| 0, | |
| 1, | |
| 9, | |
| 5, | |
| 117, | |
| 0 | |
| ], | |
| [ | |
| 10, | |
| 1, | |
| 1, | |
| 21, | |
| 1, | |
| 0, | |
| 2, | |
| 2, | |
| 0, | |
| 115 | |
| ] | |
| ], | |
| "eval_loss": 1.1163132190704346, | |
| "eval_runtime": 15.5932, | |
| "eval_samples_per_second": 67.337, | |
| "eval_steps_per_second": 4.233, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 3.689554214477539, | |
| "learning_rate": 0.00013588110403397026, | |
| "loss": 0.179, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 2.459721803665161, | |
| "learning_rate": 0.00013545647558386412, | |
| "loss": 0.1896, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 2.693918228149414, | |
| "learning_rate": 0.00013503184713375797, | |
| "loss": 0.1982, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 2.6597607135772705, | |
| "learning_rate": 0.0001346072186836518, | |
| "loss": 0.2241, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "grad_norm": 4.53094482421875, | |
| "learning_rate": 0.00013418259023354566, | |
| "loss": 0.2084, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "grad_norm": 0.6765243411064148, | |
| "learning_rate": 0.00013375796178343948, | |
| "loss": 0.1917, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.6149380207061768, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.2102, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "grad_norm": 2.5928609371185303, | |
| "learning_rate": 0.00013290870488322717, | |
| "loss": 0.239, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "grad_norm": 4.47957181930542, | |
| "learning_rate": 0.00013248407643312102, | |
| "loss": 0.2067, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "grad_norm": 3.5347230434417725, | |
| "learning_rate": 0.00013205944798301485, | |
| "loss": 0.2648, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "eval_accuracy": 0.7057142857142857, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 61, | |
| 6, | |
| 3, | |
| 3, | |
| 0, | |
| 3, | |
| 2, | |
| 12, | |
| 8, | |
| 2 | |
| ], | |
| [ | |
| 27, | |
| 131, | |
| 0, | |
| 0, | |
| 4, | |
| 0, | |
| 1, | |
| 9, | |
| 21, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 51, | |
| 2, | |
| 2, | |
| 4, | |
| 0, | |
| 1, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 4, | |
| 28, | |
| 6, | |
| 3, | |
| 0, | |
| 3, | |
| 1, | |
| 3 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 0, | |
| 4, | |
| 82, | |
| 1, | |
| 0, | |
| 1, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 8, | |
| 4, | |
| 3, | |
| 51, | |
| 0, | |
| 3, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 1, | |
| 1, | |
| 1, | |
| 0, | |
| 51, | |
| 1, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 2, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 79, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 31, | |
| 1, | |
| 0, | |
| 0, | |
| 1, | |
| 11, | |
| 10, | |
| 117, | |
| 0 | |
| ], | |
| [ | |
| 17, | |
| 5, | |
| 4, | |
| 28, | |
| 2, | |
| 2, | |
| 1, | |
| 4, | |
| 0, | |
| 90 | |
| ] | |
| ], | |
| "eval_loss": 1.1720563173294067, | |
| "eval_runtime": 15.166, | |
| "eval_samples_per_second": 69.234, | |
| "eval_steps_per_second": 4.352, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "grad_norm": 2.3316214084625244, | |
| "learning_rate": 0.0001316348195329087, | |
| "loss": 0.1836, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 1.4129242897033691, | |
| "learning_rate": 0.00013121019108280253, | |
| "loss": 0.2966, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "grad_norm": 3.569037437438965, | |
| "learning_rate": 0.0001307855626326964, | |
| "loss": 0.2079, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "grad_norm": 3.183149576187134, | |
| "learning_rate": 0.00013036093418259024, | |
| "loss": 0.2142, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "grad_norm": 1.4530551433563232, | |
| "learning_rate": 0.00012993630573248407, | |
| "loss": 0.1636, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "grad_norm": 1.5123274326324463, | |
| "learning_rate": 0.00012951167728237793, | |
| "loss": 0.1772, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 3.7511801719665527, | |
| "learning_rate": 0.00012908704883227176, | |
| "loss": 0.2243, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "grad_norm": 2.2168476581573486, | |
| "learning_rate": 0.0001286624203821656, | |
| "loss": 0.2089, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "grad_norm": 0.5811095237731934, | |
| "learning_rate": 0.00012823779193205944, | |
| "loss": 0.1868, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "grad_norm": 2.5435843467712402, | |
| "learning_rate": 0.0001278131634819533, | |
| "loss": 0.1857, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 57, | |
| 9, | |
| 5, | |
| 2, | |
| 1, | |
| 0, | |
| 4, | |
| 7, | |
| 11, | |
| 4 | |
| ], | |
| [ | |
| 22, | |
| 131, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 5, | |
| 4, | |
| 30, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 56, | |
| 1, | |
| 1, | |
| 2, | |
| 1, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 2, | |
| 28, | |
| 8, | |
| 1, | |
| 1, | |
| 1, | |
| 1, | |
| 6 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 0, | |
| 3, | |
| 85, | |
| 0, | |
| 0, | |
| 0, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 6, | |
| 2, | |
| 11, | |
| 4, | |
| 3, | |
| 36, | |
| 2, | |
| 5, | |
| 1, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 58, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 80, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 7, | |
| 32, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 19, | |
| 3, | |
| 117, | |
| 1 | |
| ], | |
| [ | |
| 6, | |
| 0, | |
| 1, | |
| 3, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 141 | |
| ] | |
| ], | |
| "eval_loss": 1.0403919219970703, | |
| "eval_runtime": 15.5401, | |
| "eval_samples_per_second": 67.567, | |
| "eval_steps_per_second": 4.247, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "grad_norm": 2.8231258392333984, | |
| "learning_rate": 0.00012738853503184712, | |
| "loss": 0.1562, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 3.1249213218688965, | |
| "learning_rate": 0.00012696390658174098, | |
| "loss": 0.1543, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "grad_norm": 0.47615599632263184, | |
| "learning_rate": 0.00012653927813163483, | |
| "loss": 0.1578, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "grad_norm": 0.8500877618789673, | |
| "learning_rate": 0.00012611464968152866, | |
| "loss": 0.1791, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "grad_norm": 3.530510902404785, | |
| "learning_rate": 0.00012569002123142252, | |
| "loss": 0.1701, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "grad_norm": 3.1433258056640625, | |
| "learning_rate": 0.00012526539278131634, | |
| "loss": 0.0973, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 2.9921345710754395, | |
| "learning_rate": 0.0001248407643312102, | |
| "loss": 0.2193, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "grad_norm": 3.721479892730713, | |
| "learning_rate": 0.00012441613588110403, | |
| "loss": 0.265, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "grad_norm": 0.5069979429244995, | |
| "learning_rate": 0.00012399150743099788, | |
| "loss": 0.2341, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "grad_norm": 0.4565420150756836, | |
| "learning_rate": 0.0001235668789808917, | |
| "loss": 0.1958, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "eval_accuracy": 0.7238095238095238, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 53, | |
| 7, | |
| 4, | |
| 3, | |
| 2, | |
| 1, | |
| 6, | |
| 9, | |
| 13, | |
| 2 | |
| ], | |
| [ | |
| 16, | |
| 134, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 10, | |
| 7, | |
| 25, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 54, | |
| 2, | |
| 1, | |
| 1, | |
| 1, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 6, | |
| 0, | |
| 3, | |
| 29, | |
| 8, | |
| 0, | |
| 1, | |
| 1, | |
| 1, | |
| 3 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 0, | |
| 2, | |
| 85, | |
| 0, | |
| 2, | |
| 0, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 7, | |
| 1, | |
| 9, | |
| 2, | |
| 4, | |
| 43, | |
| 2, | |
| 2, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 4, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 77, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 28, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 19, | |
| 4, | |
| 123, | |
| 0 | |
| ], | |
| [ | |
| 7, | |
| 0, | |
| 2, | |
| 25, | |
| 1, | |
| 0, | |
| 11, | |
| 2, | |
| 0, | |
| 105 | |
| ] | |
| ], | |
| "eval_loss": 1.1391521692276, | |
| "eval_runtime": 15.287, | |
| "eval_samples_per_second": 68.686, | |
| "eval_steps_per_second": 4.317, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 1.6156843900680542, | |
| "learning_rate": 0.00012314225053078556, | |
| "loss": 0.1635, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 2.8691422939300537, | |
| "learning_rate": 0.00012271762208067942, | |
| "loss": 0.2309, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "grad_norm": 3.7324392795562744, | |
| "learning_rate": 0.00012229299363057325, | |
| "loss": 0.1626, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "grad_norm": 3.8623502254486084, | |
| "learning_rate": 0.00012186836518046709, | |
| "loss": 0.1487, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "grad_norm": 4.658466815948486, | |
| "learning_rate": 0.00012144373673036093, | |
| "loss": 0.3255, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 2.3121321201324463, | |
| "learning_rate": 0.00012101910828025477, | |
| "loss": 0.2092, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 1.5467556715011597, | |
| "learning_rate": 0.00012059447983014863, | |
| "loss": 0.175, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "grad_norm": 3.587686777114868, | |
| "learning_rate": 0.00012016985138004247, | |
| "loss": 0.1644, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "grad_norm": 3.9866294860839844, | |
| "learning_rate": 0.00011974522292993631, | |
| "loss": 0.1916, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "grad_norm": 0.14543366432189941, | |
| "learning_rate": 0.00011932059447983015, | |
| "loss": 0.1475, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "eval_accuracy": 0.7238095238095238, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 72, | |
| 6, | |
| 4, | |
| 4, | |
| 0, | |
| 1, | |
| 2, | |
| 0, | |
| 8, | |
| 3 | |
| ], | |
| [ | |
| 52, | |
| 97, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 41, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 52, | |
| 3, | |
| 1, | |
| 1, | |
| 1, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 6, | |
| 1, | |
| 3, | |
| 32, | |
| 2, | |
| 1, | |
| 0, | |
| 1, | |
| 1, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 4, | |
| 1, | |
| 3, | |
| 79, | |
| 1, | |
| 0, | |
| 0, | |
| 3, | |
| 2 | |
| ], | |
| [ | |
| 3, | |
| 2, | |
| 12, | |
| 6, | |
| 0, | |
| 43, | |
| 0, | |
| 1, | |
| 2, | |
| 3 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 52, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 8, | |
| 0, | |
| 7, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 66, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 13, | |
| 26, | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 8, | |
| 1, | |
| 129, | |
| 0 | |
| ], | |
| [ | |
| 6, | |
| 0, | |
| 0, | |
| 7, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 138 | |
| ] | |
| ], | |
| "eval_loss": 1.1925528049468994, | |
| "eval_runtime": 15.2575, | |
| "eval_samples_per_second": 68.819, | |
| "eval_steps_per_second": 4.326, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 3.591832160949707, | |
| "learning_rate": 0.000118895966029724, | |
| "loss": 0.1943, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "grad_norm": 1.7828431129455566, | |
| "learning_rate": 0.00011847133757961784, | |
| "loss": 0.1596, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "grad_norm": 3.0881214141845703, | |
| "learning_rate": 0.00011804670912951168, | |
| "loss": 0.2402, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "grad_norm": 1.4464250802993774, | |
| "learning_rate": 0.00011762208067940552, | |
| "loss": 0.1438, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "grad_norm": 2.3067240715026855, | |
| "learning_rate": 0.00011719745222929936, | |
| "loss": 0.194, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 1.2956265211105347, | |
| "learning_rate": 0.0001167728237791932, | |
| "loss": 0.2229, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "grad_norm": 2.0302717685699463, | |
| "learning_rate": 0.00011634819532908706, | |
| "loss": 0.1599, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "grad_norm": 3.4628517627716064, | |
| "learning_rate": 0.0001159235668789809, | |
| "loss": 0.163, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "grad_norm": 4.217594146728516, | |
| "learning_rate": 0.00011549893842887474, | |
| "loss": 0.1439, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "grad_norm": 2.3878486156463623, | |
| "learning_rate": 0.00011507430997876858, | |
| "loss": 0.1443, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "eval_accuracy": 0.7152380952380952, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 64, | |
| 3, | |
| 18, | |
| 4, | |
| 1, | |
| 3, | |
| 1, | |
| 3, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 26, | |
| 112, | |
| 1, | |
| 0, | |
| 2, | |
| 0, | |
| 5, | |
| 6, | |
| 41, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 54, | |
| 2, | |
| 1, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 7, | |
| 1, | |
| 3, | |
| 34, | |
| 3, | |
| 1, | |
| 0, | |
| 1, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 3, | |
| 0, | |
| 3, | |
| 82, | |
| 0, | |
| 2, | |
| 0, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 2, | |
| 11, | |
| 5, | |
| 1, | |
| 44, | |
| 1, | |
| 1, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 7, | |
| 0, | |
| 0, | |
| 4, | |
| 0, | |
| 71, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 23, | |
| 5, | |
| 2, | |
| 0, | |
| 0, | |
| 12, | |
| 5, | |
| 128, | |
| 0 | |
| ], | |
| [ | |
| 6, | |
| 3, | |
| 1, | |
| 36, | |
| 0, | |
| 0, | |
| 0, | |
| 2, | |
| 0, | |
| 105 | |
| ] | |
| ], | |
| "eval_loss": 1.2271040678024292, | |
| "eval_runtime": 15.4096, | |
| "eval_samples_per_second": 68.139, | |
| "eval_steps_per_second": 4.283, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 2.9746904373168945, | |
| "learning_rate": 0.00011464968152866242, | |
| "loss": 0.1776, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "grad_norm": 4.259703159332275, | |
| "learning_rate": 0.00011422505307855626, | |
| "loss": 0.1744, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "grad_norm": 2.885249137878418, | |
| "learning_rate": 0.0001138004246284501, | |
| "loss": 0.1597, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "grad_norm": 1.6212122440338135, | |
| "learning_rate": 0.00011337579617834395, | |
| "loss": 0.2183, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "grad_norm": 2.243490695953369, | |
| "learning_rate": 0.00011295116772823779, | |
| "loss": 0.205, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "grad_norm": 2.2670421600341797, | |
| "learning_rate": 0.00011252653927813163, | |
| "loss": 0.1432, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "grad_norm": 2.7547404766082764, | |
| "learning_rate": 0.00011210191082802549, | |
| "loss": 0.1276, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "grad_norm": 2.6618921756744385, | |
| "learning_rate": 0.00011167728237791933, | |
| "loss": 0.1829, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "grad_norm": 4.215251922607422, | |
| "learning_rate": 0.00011125265392781317, | |
| "loss": 0.1323, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "grad_norm": 3.4978456497192383, | |
| "learning_rate": 0.00011082802547770701, | |
| "loss": 0.1453, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "eval_accuracy": 0.7390476190476191, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 71, | |
| 4, | |
| 11, | |
| 3, | |
| 0, | |
| 4, | |
| 1, | |
| 3, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 26, | |
| 127, | |
| 3, | |
| 0, | |
| 0, | |
| 2, | |
| 4, | |
| 4, | |
| 27, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 53, | |
| 2, | |
| 2, | |
| 4, | |
| 1, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 5, | |
| 2, | |
| 5, | |
| 27, | |
| 6, | |
| 2, | |
| 0, | |
| 1, | |
| 1, | |
| 3 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 87, | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 6, | |
| 1, | |
| 2, | |
| 58, | |
| 0, | |
| 1, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 3, | |
| 1, | |
| 0, | |
| 2, | |
| 0, | |
| 50, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 7, | |
| 0, | |
| 0, | |
| 4, | |
| 0, | |
| 68, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 34, | |
| 4, | |
| 0, | |
| 1, | |
| 3, | |
| 14, | |
| 1, | |
| 119, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 1, | |
| 2, | |
| 18, | |
| 2, | |
| 1, | |
| 0, | |
| 3, | |
| 1, | |
| 116 | |
| ] | |
| ], | |
| "eval_loss": 1.0546270608901978, | |
| "eval_runtime": 15.5179, | |
| "eval_samples_per_second": 67.664, | |
| "eval_steps_per_second": 4.253, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 1.68947172164917, | |
| "learning_rate": 0.00011040339702760085, | |
| "loss": 0.1613, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "grad_norm": 3.1171562671661377, | |
| "learning_rate": 0.0001099787685774947, | |
| "loss": 0.176, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "grad_norm": 2.0694828033447266, | |
| "learning_rate": 0.00010955414012738854, | |
| "loss": 0.1529, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "grad_norm": 1.485595703125, | |
| "learning_rate": 0.00010912951167728238, | |
| "loss": 0.1829, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "grad_norm": 2.217650890350342, | |
| "learning_rate": 0.00010870488322717622, | |
| "loss": 0.2196, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 1.3161414861679077, | |
| "learning_rate": 0.00010828025477707007, | |
| "loss": 0.1002, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "grad_norm": 3.4168200492858887, | |
| "learning_rate": 0.00010785562632696392, | |
| "loss": 0.1856, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "grad_norm": 2.1230216026306152, | |
| "learning_rate": 0.00010743099787685776, | |
| "loss": 0.1529, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "grad_norm": 2.512233018875122, | |
| "learning_rate": 0.0001070063694267516, | |
| "loss": 0.2026, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "grad_norm": 2.9873499870300293, | |
| "learning_rate": 0.00010658174097664544, | |
| "loss": 0.2319, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "eval_accuracy": 0.7371428571428571, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 60, | |
| 4, | |
| 9, | |
| 7, | |
| 1, | |
| 4, | |
| 2, | |
| 2, | |
| 10, | |
| 1 | |
| ], | |
| [ | |
| 18, | |
| 127, | |
| 1, | |
| 0, | |
| 2, | |
| 0, | |
| 9, | |
| 2, | |
| 34, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 53, | |
| 3, | |
| 1, | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 2, | |
| 2, | |
| 36, | |
| 6, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 4, | |
| 0, | |
| 3, | |
| 83, | |
| 0, | |
| 1, | |
| 1, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 9, | |
| 6, | |
| 1, | |
| 49, | |
| 1, | |
| 0, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 10, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 71, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 24, | |
| 4, | |
| 0, | |
| 0, | |
| 1, | |
| 15, | |
| 1, | |
| 130, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 4, | |
| 2, | |
| 28, | |
| 0, | |
| 0, | |
| 2, | |
| 5, | |
| 0, | |
| 108 | |
| ] | |
| ], | |
| "eval_loss": 1.0889815092086792, | |
| "eval_runtime": 15.3147, | |
| "eval_samples_per_second": 68.562, | |
| "eval_steps_per_second": 4.31, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 2.2253472805023193, | |
| "learning_rate": 0.00010615711252653928, | |
| "loss": 0.162, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "grad_norm": 2.6832664012908936, | |
| "learning_rate": 0.00010573248407643312, | |
| "loss": 0.1435, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "grad_norm": 2.4758527278900146, | |
| "learning_rate": 0.00010530785562632696, | |
| "loss": 0.1725, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "grad_norm": 2.8945116996765137, | |
| "learning_rate": 0.0001048832271762208, | |
| "loss": 0.1816, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "grad_norm": 1.7378908395767212, | |
| "learning_rate": 0.00010445859872611465, | |
| "loss": 0.1915, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 3.76171612739563, | |
| "learning_rate": 0.0001040339702760085, | |
| "loss": 0.1212, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "grad_norm": 2.1323678493499756, | |
| "learning_rate": 0.00010360934182590234, | |
| "loss": 0.1488, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "grad_norm": 3.7445242404937744, | |
| "learning_rate": 0.00010318471337579619, | |
| "loss": 0.1953, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "grad_norm": 2.166163682937622, | |
| "learning_rate": 0.00010276008492569003, | |
| "loss": 0.1469, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "grad_norm": 3.0045981407165527, | |
| "learning_rate": 0.00010233545647558387, | |
| "loss": 0.1499, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "eval_accuracy": 0.7, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 68, | |
| 3, | |
| 3, | |
| 11, | |
| 1, | |
| 1, | |
| 1, | |
| 4, | |
| 7, | |
| 1 | |
| ], | |
| [ | |
| 60, | |
| 82, | |
| 0, | |
| 2, | |
| 6, | |
| 0, | |
| 6, | |
| 4, | |
| 31, | |
| 2 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 43, | |
| 6, | |
| 2, | |
| 4, | |
| 2, | |
| 2, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 2, | |
| 36, | |
| 6, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 4 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 1, | |
| 2, | |
| 83, | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 3, | |
| 11, | |
| 2, | |
| 46, | |
| 0, | |
| 3, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 54, | |
| 1, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 76, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 22, | |
| 0, | |
| 1, | |
| 2, | |
| 0, | |
| 16, | |
| 2, | |
| 127, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 1, | |
| 27, | |
| 0, | |
| 0, | |
| 0, | |
| 4, | |
| 0, | |
| 120 | |
| ] | |
| ], | |
| "eval_loss": 1.3652293682098389, | |
| "eval_runtime": 15.3818, | |
| "eval_samples_per_second": 68.262, | |
| "eval_steps_per_second": 4.291, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 2.051771879196167, | |
| "learning_rate": 0.00010191082802547771, | |
| "loss": 0.2339, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "grad_norm": 2.841862201690674, | |
| "learning_rate": 0.00010148619957537155, | |
| "loss": 0.2014, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "grad_norm": 5.293687343597412, | |
| "learning_rate": 0.0001010615711252654, | |
| "loss": 0.1718, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "grad_norm": 0.5550873279571533, | |
| "learning_rate": 0.00010063694267515924, | |
| "loss": 0.142, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "grad_norm": 4.287944316864014, | |
| "learning_rate": 0.00010021231422505308, | |
| "loss": 0.1815, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "grad_norm": 1.0735574960708618, | |
| "learning_rate": 9.978768577494693e-05, | |
| "loss": 0.1861, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "grad_norm": 3.090205430984497, | |
| "learning_rate": 9.936305732484077e-05, | |
| "loss": 0.169, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "grad_norm": 2.5464820861816406, | |
| "learning_rate": 9.893842887473462e-05, | |
| "loss": 0.2243, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "grad_norm": 4.008731365203857, | |
| "learning_rate": 9.851380042462846e-05, | |
| "loss": 0.1971, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "grad_norm": 3.4554805755615234, | |
| "learning_rate": 9.80891719745223e-05, | |
| "loss": 0.1467, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "eval_accuracy": 0.6676190476190477, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 59, | |
| 3, | |
| 10, | |
| 7, | |
| 0, | |
| 7, | |
| 1, | |
| 2, | |
| 8, | |
| 3 | |
| ], | |
| [ | |
| 55, | |
| 65, | |
| 1, | |
| 0, | |
| 4, | |
| 4, | |
| 15, | |
| 3, | |
| 46, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 48, | |
| 6, | |
| 2, | |
| 1, | |
| 2, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 3, | |
| 34, | |
| 5, | |
| 1, | |
| 0, | |
| 0, | |
| 1, | |
| 7 | |
| ], | |
| [ | |
| 0, | |
| 4, | |
| 1, | |
| 2, | |
| 83, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 2, | |
| 12, | |
| 3, | |
| 1, | |
| 46, | |
| 0, | |
| 0, | |
| 2, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 5, | |
| 1, | |
| 0, | |
| 2, | |
| 4, | |
| 68, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 18, | |
| 1, | |
| 0, | |
| 3, | |
| 1, | |
| 17, | |
| 1, | |
| 129, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 3, | |
| 1, | |
| 32, | |
| 0, | |
| 0, | |
| 0, | |
| 2, | |
| 0, | |
| 113 | |
| ] | |
| ], | |
| "eval_loss": 1.4623304605484009, | |
| "eval_runtime": 15.2878, | |
| "eval_samples_per_second": 68.682, | |
| "eval_steps_per_second": 4.317, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "grad_norm": 1.7582330703735352, | |
| "learning_rate": 9.766454352441614e-05, | |
| "loss": 0.1449, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "grad_norm": 1.2631497383117676, | |
| "learning_rate": 9.723991507430998e-05, | |
| "loss": 0.1599, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "grad_norm": 0.9202260971069336, | |
| "learning_rate": 9.681528662420382e-05, | |
| "loss": 0.0866, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "grad_norm": 0.5633217692375183, | |
| "learning_rate": 9.639065817409766e-05, | |
| "loss": 0.2013, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "grad_norm": 0.0941554605960846, | |
| "learning_rate": 9.59660297239915e-05, | |
| "loss": 0.1761, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "grad_norm": 1.5101308822631836, | |
| "learning_rate": 9.554140127388536e-05, | |
| "loss": 0.0721, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "grad_norm": 2.1253440380096436, | |
| "learning_rate": 9.51167728237792e-05, | |
| "loss": 0.1284, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "grad_norm": 2.5228655338287354, | |
| "learning_rate": 9.469214437367304e-05, | |
| "loss": 0.141, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "grad_norm": 1.9860364198684692, | |
| "learning_rate": 9.426751592356689e-05, | |
| "loss": 0.1518, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "grad_norm": 0.04018540680408478, | |
| "learning_rate": 9.384288747346073e-05, | |
| "loss": 0.1163, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "eval_accuracy": 0.6819047619047619, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 53, | |
| 2, | |
| 15, | |
| 7, | |
| 0, | |
| 3, | |
| 6, | |
| 4, | |
| 5, | |
| 5 | |
| ], | |
| [ | |
| 62, | |
| 76, | |
| 2, | |
| 0, | |
| 4, | |
| 2, | |
| 19, | |
| 7, | |
| 16, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 52, | |
| 1, | |
| 1, | |
| 2, | |
| 2, | |
| 2, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 5, | |
| 28, | |
| 6, | |
| 2, | |
| 1, | |
| 2, | |
| 0, | |
| 7 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 2, | |
| 83, | |
| 3, | |
| 1, | |
| 1, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 13, | |
| 3, | |
| 0, | |
| 44, | |
| 1, | |
| 4, | |
| 0, | |
| 4 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 5, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 75, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 11, | |
| 17, | |
| 1, | |
| 0, | |
| 1, | |
| 1, | |
| 28, | |
| 3, | |
| 116, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 3, | |
| 2, | |
| 10, | |
| 0, | |
| 1, | |
| 3, | |
| 2, | |
| 0, | |
| 132 | |
| ] | |
| ], | |
| "eval_loss": 1.530079960823059, | |
| "eval_runtime": 15.6324, | |
| "eval_samples_per_second": 67.168, | |
| "eval_steps_per_second": 4.222, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "grad_norm": 2.986940622329712, | |
| "learning_rate": 9.341825902335457e-05, | |
| "loss": 0.1317, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "grad_norm": 2.86224627494812, | |
| "learning_rate": 9.299363057324841e-05, | |
| "loss": 0.133, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "grad_norm": 1.1539607048034668, | |
| "learning_rate": 9.256900212314225e-05, | |
| "loss": 0.1061, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "grad_norm": 1.738471508026123, | |
| "learning_rate": 9.21443736730361e-05, | |
| "loss": 0.0456, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "grad_norm": 4.975551605224609, | |
| "learning_rate": 9.171974522292994e-05, | |
| "loss": 0.1636, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "grad_norm": 2.86753511428833, | |
| "learning_rate": 9.129511677282379e-05, | |
| "loss": 0.2237, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "grad_norm": 2.0240707397460938, | |
| "learning_rate": 9.087048832271763e-05, | |
| "loss": 0.1529, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "grad_norm": 3.7712082862854004, | |
| "learning_rate": 9.044585987261147e-05, | |
| "loss": 0.1449, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "grad_norm": 2.0401346683502197, | |
| "learning_rate": 9.002123142250532e-05, | |
| "loss": 0.1584, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "grad_norm": 1.6185662746429443, | |
| "learning_rate": 8.959660297239916e-05, | |
| "loss": 0.1087, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "eval_accuracy": 0.7323809523809524, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 62, | |
| 6, | |
| 5, | |
| 6, | |
| 0, | |
| 0, | |
| 2, | |
| 2, | |
| 12, | |
| 5 | |
| ], | |
| [ | |
| 32, | |
| 102, | |
| 0, | |
| 0, | |
| 2, | |
| 1, | |
| 12, | |
| 3, | |
| 41, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 2, | |
| 45, | |
| 4, | |
| 1, | |
| 3, | |
| 3, | |
| 0, | |
| 0, | |
| 4 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 3, | |
| 29, | |
| 3, | |
| 0, | |
| 0, | |
| 0, | |
| 3, | |
| 9 | |
| ], | |
| [ | |
| 1, | |
| 5, | |
| 0, | |
| 4, | |
| 73, | |
| 2, | |
| 1, | |
| 1, | |
| 4, | |
| 2 | |
| ], | |
| [ | |
| 5, | |
| 3, | |
| 3, | |
| 6, | |
| 1, | |
| 43, | |
| 0, | |
| 3, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 5, | |
| 1, | |
| 0, | |
| 0, | |
| 1, | |
| 72, | |
| 3, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 21, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 9, | |
| 1, | |
| 142, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 1, | |
| 6, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 144 | |
| ] | |
| ], | |
| "eval_loss": 1.2230817079544067, | |
| "eval_runtime": 15.3373, | |
| "eval_samples_per_second": 68.46, | |
| "eval_steps_per_second": 4.303, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "grad_norm": 1.1602622270584106, | |
| "learning_rate": 8.9171974522293e-05, | |
| "loss": 0.0938, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "grad_norm": 3.745410680770874, | |
| "learning_rate": 8.874734607218684e-05, | |
| "loss": 0.1808, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "grad_norm": 7.07642936706543, | |
| "learning_rate": 8.832271762208068e-05, | |
| "loss": 0.2102, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "grad_norm": 2.742769479751587, | |
| "learning_rate": 8.789808917197452e-05, | |
| "loss": 0.1217, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "grad_norm": 3.350234031677246, | |
| "learning_rate": 8.747346072186838e-05, | |
| "loss": 0.1369, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "grad_norm": 2.5689895153045654, | |
| "learning_rate": 8.704883227176222e-05, | |
| "loss": 0.1338, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "grad_norm": 1.8818074464797974, | |
| "learning_rate": 8.662420382165606e-05, | |
| "loss": 0.1802, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "grad_norm": 2.009812831878662, | |
| "learning_rate": 8.61995753715499e-05, | |
| "loss": 0.094, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "grad_norm": 1.86770761013031, | |
| "learning_rate": 8.577494692144374e-05, | |
| "loss": 0.1355, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "grad_norm": 2.059654474258423, | |
| "learning_rate": 8.535031847133759e-05, | |
| "loss": 0.1783, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "eval_accuracy": 0.7390476190476191, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 53, | |
| 5, | |
| 17, | |
| 5, | |
| 0, | |
| 3, | |
| 2, | |
| 4, | |
| 7, | |
| 4 | |
| ], | |
| [ | |
| 23, | |
| 127, | |
| 1, | |
| 0, | |
| 4, | |
| 2, | |
| 2, | |
| 3, | |
| 31, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 56, | |
| 2, | |
| 1, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 7, | |
| 34, | |
| 3, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 6 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 3, | |
| 6, | |
| 75, | |
| 1, | |
| 1, | |
| 0, | |
| 2, | |
| 2 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 18, | |
| 5, | |
| 1, | |
| 40, | |
| 0, | |
| 2, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 54, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 9, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 71, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 6, | |
| 27, | |
| 4, | |
| 0, | |
| 0, | |
| 1, | |
| 12, | |
| 0, | |
| 130, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 2, | |
| 11, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 136 | |
| ] | |
| ], | |
| "eval_loss": 1.1571182012557983, | |
| "eval_runtime": 15.4706, | |
| "eval_samples_per_second": 67.871, | |
| "eval_steps_per_second": 4.266, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "grad_norm": 3.0963752269744873, | |
| "learning_rate": 8.492569002123143e-05, | |
| "loss": 0.1029, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "grad_norm": 0.827233076095581, | |
| "learning_rate": 8.450106157112527e-05, | |
| "loss": 0.0846, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "grad_norm": 1.6526960134506226, | |
| "learning_rate": 8.407643312101911e-05, | |
| "loss": 0.0648, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "grad_norm": 0.8661704063415527, | |
| "learning_rate": 8.365180467091295e-05, | |
| "loss": 0.1123, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "grad_norm": 2.4067697525024414, | |
| "learning_rate": 8.322717622080681e-05, | |
| "loss": 0.1395, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "grad_norm": 2.14558482170105, | |
| "learning_rate": 8.280254777070065e-05, | |
| "loss": 0.098, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "grad_norm": 2.7629847526550293, | |
| "learning_rate": 8.237791932059449e-05, | |
| "loss": 0.1616, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "grad_norm": 0.8335350751876831, | |
| "learning_rate": 8.195329087048833e-05, | |
| "loss": 0.1403, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "grad_norm": 0.8157089352607727, | |
| "learning_rate": 8.152866242038217e-05, | |
| "loss": 0.1176, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "grad_norm": 11.032809257507324, | |
| "learning_rate": 8.110403397027602e-05, | |
| "loss": 0.1733, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "eval_accuracy": 0.719047619047619, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 51, | |
| 5, | |
| 13, | |
| 8, | |
| 0, | |
| 4, | |
| 4, | |
| 5, | |
| 8, | |
| 2 | |
| ], | |
| [ | |
| 29, | |
| 116, | |
| 6, | |
| 0, | |
| 0, | |
| 4, | |
| 10, | |
| 2, | |
| 26, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 49, | |
| 1, | |
| 1, | |
| 8, | |
| 2, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 5, | |
| 34, | |
| 4, | |
| 3, | |
| 0, | |
| 0, | |
| 0, | |
| 6 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 4, | |
| 2, | |
| 76, | |
| 4, | |
| 2, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 8, | |
| 4, | |
| 0, | |
| 52, | |
| 0, | |
| 3, | |
| 2, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 58, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 9, | |
| 0, | |
| 0, | |
| 2, | |
| 0, | |
| 71, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 26, | |
| 5, | |
| 0, | |
| 0, | |
| 4, | |
| 19, | |
| 3, | |
| 118, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 2, | |
| 11, | |
| 0, | |
| 0, | |
| 5, | |
| 2, | |
| 0, | |
| 130 | |
| ] | |
| ], | |
| "eval_loss": 1.3043608665466309, | |
| "eval_runtime": 15.138, | |
| "eval_samples_per_second": 69.362, | |
| "eval_steps_per_second": 4.36, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "grad_norm": 3.626187324523926, | |
| "learning_rate": 8.067940552016986e-05, | |
| "loss": 0.1916, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "grad_norm": 3.0848803520202637, | |
| "learning_rate": 8.02547770700637e-05, | |
| "loss": 0.1663, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "grad_norm": 1.9697245359420776, | |
| "learning_rate": 7.983014861995754e-05, | |
| "loss": 0.1136, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 0.600030243396759, | |
| "learning_rate": 7.940552016985138e-05, | |
| "loss": 0.1415, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "grad_norm": 5.171400547027588, | |
| "learning_rate": 7.898089171974524e-05, | |
| "loss": 0.1881, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "grad_norm": 2.708814859390259, | |
| "learning_rate": 7.855626326963908e-05, | |
| "loss": 0.1517, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "grad_norm": 0.7085170149803162, | |
| "learning_rate": 7.813163481953292e-05, | |
| "loss": 0.0796, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "grad_norm": 2.6039786338806152, | |
| "learning_rate": 7.770700636942676e-05, | |
| "loss": 0.0846, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 4.559110641479492, | |
| "learning_rate": 7.72823779193206e-05, | |
| "loss": 0.1696, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "grad_norm": 3.545804262161255, | |
| "learning_rate": 7.685774946921445e-05, | |
| "loss": 0.1275, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "eval_accuracy": 0.7266666666666667, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 66, | |
| 6, | |
| 8, | |
| 5, | |
| 0, | |
| 4, | |
| 3, | |
| 3, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 53, | |
| 111, | |
| 0, | |
| 0, | |
| 4, | |
| 1, | |
| 4, | |
| 3, | |
| 17, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 48, | |
| 3, | |
| 1, | |
| 5, | |
| 2, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 5, | |
| 1, | |
| 3, | |
| 27, | |
| 5, | |
| 2, | |
| 0, | |
| 2, | |
| 1, | |
| 6 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 0, | |
| 1, | |
| 85, | |
| 0, | |
| 1, | |
| 0, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 5, | |
| 6, | |
| 0, | |
| 50, | |
| 2, | |
| 3, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 3, | |
| 0, | |
| 0, | |
| 2, | |
| 1, | |
| 74, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 13, | |
| 34, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 11, | |
| 1, | |
| 119, | |
| 0 | |
| ], | |
| [ | |
| 7, | |
| 0, | |
| 1, | |
| 14, | |
| 0, | |
| 0, | |
| 3, | |
| 2, | |
| 0, | |
| 126 | |
| ] | |
| ], | |
| "eval_loss": 1.2415543794631958, | |
| "eval_runtime": 15.3407, | |
| "eval_samples_per_second": 68.445, | |
| "eval_steps_per_second": 4.302, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "grad_norm": 0.5964432954788208, | |
| "learning_rate": 7.643312101910829e-05, | |
| "loss": 0.0986, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "grad_norm": 1.4419245719909668, | |
| "learning_rate": 7.600849256900213e-05, | |
| "loss": 0.0997, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "grad_norm": 1.5141104459762573, | |
| "learning_rate": 7.558386411889597e-05, | |
| "loss": 0.1625, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "grad_norm": 0.5192936658859253, | |
| "learning_rate": 7.515923566878981e-05, | |
| "loss": 0.1522, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "grad_norm": 2.7084801197052, | |
| "learning_rate": 7.473460721868365e-05, | |
| "loss": 0.1412, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "grad_norm": 3.396594762802124, | |
| "learning_rate": 7.43099787685775e-05, | |
| "loss": 0.123, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "grad_norm": 0.13223083317279816, | |
| "learning_rate": 7.388535031847134e-05, | |
| "loss": 0.1273, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "grad_norm": 1.5881577730178833, | |
| "learning_rate": 7.346072186836518e-05, | |
| "loss": 0.1309, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "grad_norm": 2.229868173599243, | |
| "learning_rate": 7.303609341825902e-05, | |
| "loss": 0.1302, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "grad_norm": 4.873483657836914, | |
| "learning_rate": 7.261146496815286e-05, | |
| "loss": 0.1231, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "eval_accuracy": 0.7123809523809523, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 73, | |
| 3, | |
| 7, | |
| 5, | |
| 0, | |
| 1, | |
| 4, | |
| 2, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 84, | |
| 81, | |
| 0, | |
| 0, | |
| 3, | |
| 1, | |
| 3, | |
| 1, | |
| 20, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 51, | |
| 2, | |
| 1, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 3, | |
| 28, | |
| 6, | |
| 1, | |
| 0, | |
| 0, | |
| 1, | |
| 8 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 86, | |
| 0, | |
| 1, | |
| 0, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 9, | |
| 0, | |
| 6, | |
| 4, | |
| 1, | |
| 46, | |
| 1, | |
| 3, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 54, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 10, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 71, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 21, | |
| 23, | |
| 1, | |
| 0, | |
| 0, | |
| 2, | |
| 12, | |
| 2, | |
| 119, | |
| 0 | |
| ], | |
| [ | |
| 7, | |
| 0, | |
| 1, | |
| 4, | |
| 0, | |
| 0, | |
| 0, | |
| 2, | |
| 0, | |
| 139 | |
| ] | |
| ], | |
| "eval_loss": 1.4283809661865234, | |
| "eval_runtime": 15.3421, | |
| "eval_samples_per_second": 68.439, | |
| "eval_steps_per_second": 4.302, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "grad_norm": 1.5915924310684204, | |
| "learning_rate": 7.21868365180467e-05, | |
| "loss": 0.1309, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "grad_norm": 0.040684912353754044, | |
| "learning_rate": 7.176220806794054e-05, | |
| "loss": 0.0861, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "grad_norm": 2.1614465713500977, | |
| "learning_rate": 7.13375796178344e-05, | |
| "loss": 0.1997, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "grad_norm": 2.8283462524414062, | |
| "learning_rate": 7.091295116772824e-05, | |
| "loss": 0.2087, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "grad_norm": 2.3698461055755615, | |
| "learning_rate": 7.048832271762208e-05, | |
| "loss": 0.2046, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "grad_norm": 0.0435897558927536, | |
| "learning_rate": 7.006369426751592e-05, | |
| "loss": 0.1335, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "grad_norm": 0.3226892948150635, | |
| "learning_rate": 6.963906581740977e-05, | |
| "loss": 0.1027, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "grad_norm": 3.1752257347106934, | |
| "learning_rate": 6.921443736730361e-05, | |
| "loss": 0.156, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "grad_norm": 1.864380121231079, | |
| "learning_rate": 6.878980891719745e-05, | |
| "loss": 0.1441, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "grad_norm": 2.183727502822876, | |
| "learning_rate": 6.836518046709129e-05, | |
| "loss": 0.1828, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "eval_accuracy": 0.7523809523809524, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 66, | |
| 2, | |
| 13, | |
| 7, | |
| 0, | |
| 0, | |
| 2, | |
| 1, | |
| 7, | |
| 2 | |
| ], | |
| [ | |
| 38, | |
| 115, | |
| 1, | |
| 0, | |
| 4, | |
| 0, | |
| 4, | |
| 2, | |
| 28, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 52, | |
| 2, | |
| 1, | |
| 4, | |
| 2, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 4, | |
| 35, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 4 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 5, | |
| 83, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 1, | |
| 12, | |
| 6, | |
| 3, | |
| 41, | |
| 1, | |
| 2, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 58, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 72, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 11, | |
| 24, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 10, | |
| 1, | |
| 132, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 2, | |
| 9, | |
| 0, | |
| 0, | |
| 1, | |
| 2, | |
| 0, | |
| 136 | |
| ] | |
| ], | |
| "eval_loss": 1.2049111127853394, | |
| "eval_runtime": 15.2555, | |
| "eval_samples_per_second": 68.828, | |
| "eval_steps_per_second": 4.326, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "grad_norm": 4.488446235656738, | |
| "learning_rate": 6.794055201698513e-05, | |
| "loss": 0.1018, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "grad_norm": 3.176164388656616, | |
| "learning_rate": 6.751592356687899e-05, | |
| "loss": 0.1049, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "grad_norm": 4.369916915893555, | |
| "learning_rate": 6.709129511677283e-05, | |
| "loss": 0.1179, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 2.099869728088379, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.1081, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "grad_norm": 4.553194522857666, | |
| "learning_rate": 6.624203821656051e-05, | |
| "loss": 0.1152, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "grad_norm": 0.31203553080558777, | |
| "learning_rate": 6.581740976645435e-05, | |
| "loss": 0.0666, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "grad_norm": 1.5009262561798096, | |
| "learning_rate": 6.53927813163482e-05, | |
| "loss": 0.1208, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 10.13, | |
| "grad_norm": 0.2197587788105011, | |
| "learning_rate": 6.496815286624204e-05, | |
| "loss": 0.0826, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "grad_norm": 0.05214032158255577, | |
| "learning_rate": 6.454352441613588e-05, | |
| "loss": 0.0983, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "grad_norm": 3.6048381328582764, | |
| "learning_rate": 6.411889596602972e-05, | |
| "loss": 0.083, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "eval_accuracy": 0.7238095238095238, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 57, | |
| 5, | |
| 16, | |
| 5, | |
| 1, | |
| 1, | |
| 7, | |
| 2, | |
| 3, | |
| 3 | |
| ], | |
| [ | |
| 30, | |
| 127, | |
| 0, | |
| 0, | |
| 1, | |
| 2, | |
| 11, | |
| 3, | |
| 18, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 52, | |
| 3, | |
| 1, | |
| 5, | |
| 2, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 5, | |
| 30, | |
| 5, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 7 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 0, | |
| 4, | |
| 84, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 9, | |
| 4, | |
| 1, | |
| 48, | |
| 2, | |
| 1, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 57, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 7, | |
| 0, | |
| 0, | |
| 0, | |
| 3, | |
| 69, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 27, | |
| 1, | |
| 0, | |
| 0, | |
| 1, | |
| 32, | |
| 0, | |
| 109, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 2, | |
| 16, | |
| 0, | |
| 0, | |
| 3, | |
| 2, | |
| 0, | |
| 127 | |
| ] | |
| ], | |
| "eval_loss": 1.2484331130981445, | |
| "eval_runtime": 15.4412, | |
| "eval_samples_per_second": 68.0, | |
| "eval_steps_per_second": 4.274, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "grad_norm": 2.7771308422088623, | |
| "learning_rate": 6.369426751592356e-05, | |
| "loss": 0.1365, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 10.25, | |
| "grad_norm": 0.18859456479549408, | |
| "learning_rate": 6.326963906581742e-05, | |
| "loss": 0.0684, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "grad_norm": 1.620737910270691, | |
| "learning_rate": 6.284501061571126e-05, | |
| "loss": 0.1069, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "grad_norm": 0.8589503765106201, | |
| "learning_rate": 6.24203821656051e-05, | |
| "loss": 0.172, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 10.35, | |
| "grad_norm": 1.9255318641662598, | |
| "learning_rate": 6.199575371549894e-05, | |
| "loss": 0.0901, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "grad_norm": 3.53298282623291, | |
| "learning_rate": 6.157112526539278e-05, | |
| "loss": 0.0516, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "grad_norm": 1.6446423530578613, | |
| "learning_rate": 6.114649681528662e-05, | |
| "loss": 0.15, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "grad_norm": 2.324852466583252, | |
| "learning_rate": 6.0721868365180465e-05, | |
| "loss": 0.1349, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "grad_norm": 1.3299524784088135, | |
| "learning_rate": 6.0297239915074314e-05, | |
| "loss": 0.0912, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "grad_norm": 0.16438178718090057, | |
| "learning_rate": 5.9872611464968155e-05, | |
| "loss": 0.1256, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "eval_accuracy": 0.7228571428571429, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 64, | |
| 4, | |
| 8, | |
| 4, | |
| 1, | |
| 7, | |
| 5, | |
| 2, | |
| 2, | |
| 3 | |
| ], | |
| [ | |
| 43, | |
| 119, | |
| 0, | |
| 0, | |
| 2, | |
| 1, | |
| 10, | |
| 3, | |
| 14, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 49, | |
| 3, | |
| 1, | |
| 7, | |
| 2, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 8, | |
| 27, | |
| 5, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 7 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 2, | |
| 2, | |
| 81, | |
| 3, | |
| 1, | |
| 0, | |
| 1, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 10, | |
| 3, | |
| 0, | |
| 50, | |
| 1, | |
| 0, | |
| 2, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 55, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 3, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 77, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 11, | |
| 37, | |
| 0, | |
| 0, | |
| 0, | |
| 3, | |
| 25, | |
| 2, | |
| 102, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 2, | |
| 11, | |
| 0, | |
| 0, | |
| 1, | |
| 2, | |
| 0, | |
| 135 | |
| ] | |
| ], | |
| "eval_loss": 1.2746260166168213, | |
| "eval_runtime": 15.3791, | |
| "eval_samples_per_second": 68.274, | |
| "eval_steps_per_second": 4.292, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "grad_norm": 1.9269123077392578, | |
| "learning_rate": 5.9447983014862e-05, | |
| "loss": 0.0969, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 10.57, | |
| "grad_norm": 1.9249624013900757, | |
| "learning_rate": 5.902335456475584e-05, | |
| "loss": 0.0927, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 10.61, | |
| "grad_norm": 1.8576029539108276, | |
| "learning_rate": 5.859872611464968e-05, | |
| "loss": 0.0572, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "grad_norm": 3.594461441040039, | |
| "learning_rate": 5.817409766454353e-05, | |
| "loss": 0.0982, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "grad_norm": 1.7123650312423706, | |
| "learning_rate": 5.774946921443737e-05, | |
| "loss": 0.083, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "grad_norm": 0.2372170090675354, | |
| "learning_rate": 5.732484076433121e-05, | |
| "loss": 0.0989, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "grad_norm": 4.383459091186523, | |
| "learning_rate": 5.690021231422505e-05, | |
| "loss": 0.1262, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "grad_norm": 2.293396472930908, | |
| "learning_rate": 5.6475583864118895e-05, | |
| "loss": 0.1413, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 3.0922889709472656, | |
| "learning_rate": 5.605095541401274e-05, | |
| "loss": 0.1755, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "grad_norm": 3.0807735919952393, | |
| "learning_rate": 5.5626326963906585e-05, | |
| "loss": 0.1067, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "eval_accuracy": 0.7380952380952381, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 55, | |
| 3, | |
| 11, | |
| 9, | |
| 1, | |
| 2, | |
| 7, | |
| 2, | |
| 6, | |
| 4 | |
| ], | |
| [ | |
| 35, | |
| 122, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 6, | |
| 2, | |
| 25, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 50, | |
| 2, | |
| 1, | |
| 3, | |
| 4, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 4, | |
| 37, | |
| 4, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 4, | |
| 82, | |
| 2, | |
| 1, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 13, | |
| 6, | |
| 0, | |
| 44, | |
| 1, | |
| 0, | |
| 1, | |
| 4 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 3, | |
| 1, | |
| 0, | |
| 0, | |
| 1, | |
| 76, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 9, | |
| 36, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 20, | |
| 2, | |
| 112, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 2, | |
| 10, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 141 | |
| ] | |
| ], | |
| "eval_loss": 1.190523624420166, | |
| "eval_runtime": 15.9133, | |
| "eval_samples_per_second": 65.982, | |
| "eval_steps_per_second": 4.147, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "grad_norm": 6.171202182769775, | |
| "learning_rate": 5.5201698513800426e-05, | |
| "loss": 0.1262, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "grad_norm": 0.7859517931938171, | |
| "learning_rate": 5.477707006369427e-05, | |
| "loss": 0.1045, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "grad_norm": 2.1993987560272217, | |
| "learning_rate": 5.435244161358811e-05, | |
| "loss": 0.1104, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "grad_norm": 1.4276065826416016, | |
| "learning_rate": 5.392781316348196e-05, | |
| "loss": 0.098, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "grad_norm": 3.335463523864746, | |
| "learning_rate": 5.35031847133758e-05, | |
| "loss": 0.0561, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "grad_norm": 1.2473598718643188, | |
| "learning_rate": 5.307855626326964e-05, | |
| "loss": 0.0946, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "grad_norm": 2.4468166828155518, | |
| "learning_rate": 5.265392781316348e-05, | |
| "loss": 0.1373, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 11.08, | |
| "grad_norm": 4.396722793579102, | |
| "learning_rate": 5.2229299363057324e-05, | |
| "loss": 0.0794, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "grad_norm": 1.3597296476364136, | |
| "learning_rate": 5.180467091295117e-05, | |
| "loss": 0.1506, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 11.15, | |
| "grad_norm": 1.7938604354858398, | |
| "learning_rate": 5.1380042462845014e-05, | |
| "loss": 0.092, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 11.15, | |
| "eval_accuracy": 0.7476190476190476, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 65, | |
| 3, | |
| 8, | |
| 2, | |
| 0, | |
| 4, | |
| 5, | |
| 2, | |
| 9, | |
| 2 | |
| ], | |
| [ | |
| 27, | |
| 108, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 8, | |
| 2, | |
| 46, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 49, | |
| 2, | |
| 1, | |
| 4, | |
| 3, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 3, | |
| 37, | |
| 4, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 3, | |
| 83, | |
| 2, | |
| 1, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 6, | |
| 2, | |
| 8, | |
| 6, | |
| 0, | |
| 45, | |
| 1, | |
| 0, | |
| 1, | |
| 3 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 56, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 2, | |
| 1, | |
| 0, | |
| 1, | |
| 3, | |
| 72, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 10, | |
| 22, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 12, | |
| 1, | |
| 134, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 1, | |
| 12, | |
| 0, | |
| 1, | |
| 2, | |
| 0, | |
| 0, | |
| 136 | |
| ] | |
| ], | |
| "eval_loss": 1.1174523830413818, | |
| "eval_runtime": 15.7765, | |
| "eval_samples_per_second": 66.554, | |
| "eval_steps_per_second": 4.183, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "grad_norm": 0.015160035341978073, | |
| "learning_rate": 5.0955414012738855e-05, | |
| "loss": 0.08, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "grad_norm": 2.8019070625305176, | |
| "learning_rate": 5.05307855626327e-05, | |
| "loss": 0.1266, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 11.24, | |
| "grad_norm": 0.25064635276794434, | |
| "learning_rate": 5.010615711252654e-05, | |
| "loss": 0.1271, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "grad_norm": 2.228379726409912, | |
| "learning_rate": 4.968152866242039e-05, | |
| "loss": 0.0815, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "grad_norm": 1.0253998041152954, | |
| "learning_rate": 4.925690021231423e-05, | |
| "loss": 0.0373, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "grad_norm": 2.2330989837646484, | |
| "learning_rate": 4.883227176220807e-05, | |
| "loss": 0.1606, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "grad_norm": 2.266019821166992, | |
| "learning_rate": 4.840764331210191e-05, | |
| "loss": 0.0989, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "grad_norm": 0.6040272116661072, | |
| "learning_rate": 4.798301486199575e-05, | |
| "loss": 0.0714, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "grad_norm": 2.601849317550659, | |
| "learning_rate": 4.75583864118896e-05, | |
| "loss": 0.1037, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "grad_norm": 2.4073190689086914, | |
| "learning_rate": 4.713375796178344e-05, | |
| "loss": 0.153, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "eval_accuracy": 0.7361904761904762, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 75, | |
| 4, | |
| 9, | |
| 1, | |
| 0, | |
| 1, | |
| 2, | |
| 3, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 51, | |
| 111, | |
| 0, | |
| 1, | |
| 1, | |
| 1, | |
| 1, | |
| 3, | |
| 24, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 52, | |
| 2, | |
| 1, | |
| 3, | |
| 0, | |
| 2, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 1, | |
| 6, | |
| 30, | |
| 4, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 6 | |
| ], | |
| [ | |
| 1, | |
| 4, | |
| 1, | |
| 4, | |
| 80, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 6, | |
| 4, | |
| 8, | |
| 6, | |
| 1, | |
| 39, | |
| 0, | |
| 1, | |
| 2, | |
| 5 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 1, | |
| 1, | |
| 1, | |
| 0, | |
| 52, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 80, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 15, | |
| 33, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 11, | |
| 3, | |
| 114, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 1, | |
| 7, | |
| 2, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 140 | |
| ] | |
| ], | |
| "eval_loss": 1.243424654006958, | |
| "eval_runtime": 15.7389, | |
| "eval_samples_per_second": 66.714, | |
| "eval_steps_per_second": 4.193, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "grad_norm": 3.7230005264282227, | |
| "learning_rate": 4.6709129511677285e-05, | |
| "loss": 0.0819, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 11.53, | |
| "grad_norm": 2.161355495452881, | |
| "learning_rate": 4.6284501061571126e-05, | |
| "loss": 0.0946, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "grad_norm": 0.5289556384086609, | |
| "learning_rate": 4.585987261146497e-05, | |
| "loss": 0.0634, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "grad_norm": 0.37803375720977783, | |
| "learning_rate": 4.5435244161358816e-05, | |
| "loss": 0.0931, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "grad_norm": 1.356149673461914, | |
| "learning_rate": 4.501061571125266e-05, | |
| "loss": 0.0854, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "grad_norm": 1.575438380241394, | |
| "learning_rate": 4.45859872611465e-05, | |
| "loss": 0.08, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "grad_norm": 1.5214145183563232, | |
| "learning_rate": 4.416135881104034e-05, | |
| "loss": 0.0578, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "grad_norm": 0.6308757662773132, | |
| "learning_rate": 4.373673036093419e-05, | |
| "loss": 0.0849, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "grad_norm": 2.426156520843506, | |
| "learning_rate": 4.331210191082803e-05, | |
| "loss": 0.1025, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "grad_norm": 2.7888565063476562, | |
| "learning_rate": 4.288747346072187e-05, | |
| "loss": 0.1065, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "eval_accuracy": 0.7371428571428571, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 69, | |
| 2, | |
| 10, | |
| 2, | |
| 0, | |
| 2, | |
| 6, | |
| 3, | |
| 5, | |
| 1 | |
| ], | |
| [ | |
| 44, | |
| 109, | |
| 2, | |
| 0, | |
| 0, | |
| 2, | |
| 5, | |
| 3, | |
| 28, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 50, | |
| 2, | |
| 1, | |
| 4, | |
| 2, | |
| 1, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 4, | |
| 32, | |
| 4, | |
| 2, | |
| 0, | |
| 1, | |
| 1, | |
| 4 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 2, | |
| 6, | |
| 76, | |
| 2, | |
| 1, | |
| 1, | |
| 2, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 11, | |
| 4, | |
| 0, | |
| 47, | |
| 2, | |
| 1, | |
| 2, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 58, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 4, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 76, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 27, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 13, | |
| 1, | |
| 128, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 2, | |
| 13, | |
| 0, | |
| 0, | |
| 4, | |
| 1, | |
| 0, | |
| 129 | |
| ] | |
| ], | |
| "eval_loss": 1.232663631439209, | |
| "eval_runtime": 15.4779, | |
| "eval_samples_per_second": 67.839, | |
| "eval_steps_per_second": 4.264, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "grad_norm": 0.030465861782431602, | |
| "learning_rate": 4.2462845010615714e-05, | |
| "loss": 0.0873, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "grad_norm": 1.4719470739364624, | |
| "learning_rate": 4.2038216560509556e-05, | |
| "loss": 0.1074, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 11.88, | |
| "grad_norm": 0.23886482417583466, | |
| "learning_rate": 4.1613588110403404e-05, | |
| "loss": 0.0592, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 11.91, | |
| "grad_norm": 2.1231284141540527, | |
| "learning_rate": 4.1188959660297246e-05, | |
| "loss": 0.0642, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "grad_norm": 0.41407281160354614, | |
| "learning_rate": 4.076433121019109e-05, | |
| "loss": 0.1265, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 11.97, | |
| "grad_norm": 3.6939876079559326, | |
| "learning_rate": 4.033970276008493e-05, | |
| "loss": 0.1025, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "grad_norm": 0.5653472542762756, | |
| "learning_rate": 3.991507430997877e-05, | |
| "loss": 0.111, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "grad_norm": 1.5738434791564941, | |
| "learning_rate": 3.949044585987262e-05, | |
| "loss": 0.1009, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "grad_norm": 0.18574613332748413, | |
| "learning_rate": 3.906581740976646e-05, | |
| "loss": 0.0578, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "grad_norm": 2.1222317218780518, | |
| "learning_rate": 3.86411889596603e-05, | |
| "loss": 0.0875, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "eval_accuracy": 0.7457142857142857, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 67, | |
| 3, | |
| 10, | |
| 5, | |
| 0, | |
| 3, | |
| 1, | |
| 3, | |
| 6, | |
| 2 | |
| ], | |
| [ | |
| 40, | |
| 110, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 4, | |
| 3, | |
| 34, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 1, | |
| 51, | |
| 2, | |
| 1, | |
| 3, | |
| 1, | |
| 2, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 4, | |
| 35, | |
| 4, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 4 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 1, | |
| 5, | |
| 78, | |
| 3, | |
| 1, | |
| 1, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 1, | |
| 2, | |
| 12, | |
| 4, | |
| 0, | |
| 45, | |
| 2, | |
| 4, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 78, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 26, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 12, | |
| 1, | |
| 129, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 2, | |
| 13, | |
| 0, | |
| 0, | |
| 3, | |
| 1, | |
| 0, | |
| 134 | |
| ] | |
| ], | |
| "eval_loss": 1.2356998920440674, | |
| "eval_runtime": 15.3228, | |
| "eval_samples_per_second": 68.525, | |
| "eval_steps_per_second": 4.307, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 12.13, | |
| "grad_norm": 6.8635478019714355, | |
| "learning_rate": 3.821656050955414e-05, | |
| "loss": 0.0778, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "grad_norm": 3.9780235290527344, | |
| "learning_rate": 3.7791932059447985e-05, | |
| "loss": 0.103, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 12.2, | |
| "grad_norm": 1.2772787809371948, | |
| "learning_rate": 3.7367303609341826e-05, | |
| "loss": 0.0413, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "grad_norm": 2.644360065460205, | |
| "learning_rate": 3.694267515923567e-05, | |
| "loss": 0.0451, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "grad_norm": 3.3227038383483887, | |
| "learning_rate": 3.651804670912951e-05, | |
| "loss": 0.0924, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "grad_norm": 0.4245937764644623, | |
| "learning_rate": 3.609341825902335e-05, | |
| "loss": 0.0422, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 1.7972066402435303, | |
| "learning_rate": 3.56687898089172e-05, | |
| "loss": 0.0574, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 12.36, | |
| "grad_norm": 4.524425983428955, | |
| "learning_rate": 3.524416135881104e-05, | |
| "loss": 0.0768, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "grad_norm": 3.626488447189331, | |
| "learning_rate": 3.481953290870488e-05, | |
| "loss": 0.081, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "grad_norm": 1.6742682456970215, | |
| "learning_rate": 3.4394904458598724e-05, | |
| "loss": 0.0714, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "eval_accuracy": 0.7304761904761905, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 77, | |
| 3, | |
| 7, | |
| 3, | |
| 0, | |
| 1, | |
| 2, | |
| 2, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 58, | |
| 103, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 4, | |
| 1, | |
| 26, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 1, | |
| 51, | |
| 2, | |
| 1, | |
| 3, | |
| 1, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 4, | |
| 33, | |
| 6, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 4, | |
| 3, | |
| 77, | |
| 2, | |
| 1, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 5, | |
| 2, | |
| 14, | |
| 6, | |
| 0, | |
| 44, | |
| 0, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 53, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 7, | |
| 0, | |
| 7, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 69, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 14, | |
| 28, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 12, | |
| 1, | |
| 124, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 11, | |
| 0, | |
| 3, | |
| 0, | |
| 0, | |
| 0, | |
| 136 | |
| ] | |
| ], | |
| "eval_loss": 1.2995976209640503, | |
| "eval_runtime": 15.7291, | |
| "eval_samples_per_second": 66.755, | |
| "eval_steps_per_second": 4.196, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 12.45, | |
| "grad_norm": 0.18191276490688324, | |
| "learning_rate": 3.3970276008492566e-05, | |
| "loss": 0.1241, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "grad_norm": 1.7220439910888672, | |
| "learning_rate": 3.3545647558386414e-05, | |
| "loss": 0.117, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 12.52, | |
| "grad_norm": 2.8817391395568848, | |
| "learning_rate": 3.3121019108280256e-05, | |
| "loss": 0.0711, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 12.55, | |
| "grad_norm": 2.5080385208129883, | |
| "learning_rate": 3.26963906581741e-05, | |
| "loss": 0.1241, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "grad_norm": 1.9212385416030884, | |
| "learning_rate": 3.227176220806794e-05, | |
| "loss": 0.057, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "grad_norm": 0.9736161231994629, | |
| "learning_rate": 3.184713375796178e-05, | |
| "loss": 0.0674, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "grad_norm": 3.3597800731658936, | |
| "learning_rate": 3.142250530785563e-05, | |
| "loss": 0.0848, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "grad_norm": 0.008619729429483414, | |
| "learning_rate": 3.099787685774947e-05, | |
| "loss": 0.0811, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "grad_norm": 1.4590424299240112, | |
| "learning_rate": 3.057324840764331e-05, | |
| "loss": 0.0564, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 12.74, | |
| "grad_norm": 0.1266852766275406, | |
| "learning_rate": 3.0148619957537157e-05, | |
| "loss": 0.1433, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 12.74, | |
| "eval_accuracy": 0.7409523809523809, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 74, | |
| 4, | |
| 9, | |
| 3, | |
| 0, | |
| 2, | |
| 1, | |
| 2, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 42, | |
| 114, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 3, | |
| 3, | |
| 29, | |
| 0 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 49, | |
| 2, | |
| 1, | |
| 5, | |
| 2, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 4, | |
| 31, | |
| 6, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 2, | |
| 2, | |
| 86, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 7, | |
| 2, | |
| 9, | |
| 6, | |
| 0, | |
| 46, | |
| 1, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 53, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 75, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 34, | |
| 0, | |
| 0, | |
| 1, | |
| 2, | |
| 11, | |
| 1, | |
| 120, | |
| 2 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 1, | |
| 16, | |
| 0, | |
| 1, | |
| 2, | |
| 0, | |
| 0, | |
| 130 | |
| ] | |
| ], | |
| "eval_loss": 1.2166801691055298, | |
| "eval_runtime": 15.2638, | |
| "eval_samples_per_second": 68.79, | |
| "eval_steps_per_second": 4.324, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "grad_norm": 0.010001400485634804, | |
| "learning_rate": 2.9723991507431e-05, | |
| "loss": 0.1069, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "grad_norm": 0.3851306438446045, | |
| "learning_rate": 2.929936305732484e-05, | |
| "loss": 0.0686, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "grad_norm": 2.4934566020965576, | |
| "learning_rate": 2.8874734607218685e-05, | |
| "loss": 0.1435, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 12.87, | |
| "grad_norm": 1.281803846359253, | |
| "learning_rate": 2.8450106157112527e-05, | |
| "loss": 0.0722, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "grad_norm": 0.7258008122444153, | |
| "learning_rate": 2.802547770700637e-05, | |
| "loss": 0.1327, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 12.93, | |
| "grad_norm": 0.012724624946713448, | |
| "learning_rate": 2.7600849256900213e-05, | |
| "loss": 0.0961, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "grad_norm": 3.1563878059387207, | |
| "learning_rate": 2.7176220806794055e-05, | |
| "loss": 0.0766, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "grad_norm": 2.661426544189453, | |
| "learning_rate": 2.67515923566879e-05, | |
| "loss": 0.0778, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "grad_norm": 4.320444107055664, | |
| "learning_rate": 2.632696390658174e-05, | |
| "loss": 0.0692, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 13.06, | |
| "grad_norm": 2.4924447536468506, | |
| "learning_rate": 2.5902335456475586e-05, | |
| "loss": 0.0765, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 13.06, | |
| "eval_accuracy": 0.7380952380952381, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 69, | |
| 4, | |
| 10, | |
| 3, | |
| 0, | |
| 4, | |
| 4, | |
| 2, | |
| 3, | |
| 1 | |
| ], | |
| [ | |
| 48, | |
| 105, | |
| 0, | |
| 0, | |
| 2, | |
| 1, | |
| 6, | |
| 3, | |
| 28, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 50, | |
| 1, | |
| 1, | |
| 6, | |
| 2, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 5, | |
| 31, | |
| 6, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 4, | |
| 82, | |
| 2, | |
| 1, | |
| 1, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 2, | |
| 10, | |
| 5, | |
| 0, | |
| 47, | |
| 2, | |
| 2, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 55, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 3, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 78, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 10, | |
| 30, | |
| 0, | |
| 0, | |
| 1, | |
| 2, | |
| 16, | |
| 1, | |
| 118, | |
| 2 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 1, | |
| 8, | |
| 0, | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 140 | |
| ] | |
| ], | |
| "eval_loss": 1.2837210893630981, | |
| "eval_runtime": 15.5402, | |
| "eval_samples_per_second": 67.567, | |
| "eval_steps_per_second": 4.247, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 13.09, | |
| "grad_norm": 2.188967704772949, | |
| "learning_rate": 2.5477707006369428e-05, | |
| "loss": 0.0632, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "grad_norm": 1.1061598062515259, | |
| "learning_rate": 2.505307855626327e-05, | |
| "loss": 0.0618, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 13.15, | |
| "grad_norm": 1.8072304725646973, | |
| "learning_rate": 2.4628450106157114e-05, | |
| "loss": 0.0436, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 13.18, | |
| "grad_norm": 0.3257448971271515, | |
| "learning_rate": 2.4203821656050956e-05, | |
| "loss": 0.0804, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 13.22, | |
| "grad_norm": 0.5817819237709045, | |
| "learning_rate": 2.37791932059448e-05, | |
| "loss": 0.034, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 13.25, | |
| "grad_norm": 2.3578078746795654, | |
| "learning_rate": 2.3354564755838642e-05, | |
| "loss": 0.069, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "grad_norm": 0.23316432535648346, | |
| "learning_rate": 2.2929936305732484e-05, | |
| "loss": 0.0593, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 13.31, | |
| "grad_norm": 1.1364542245864868, | |
| "learning_rate": 2.250530785562633e-05, | |
| "loss": 0.0801, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 13.34, | |
| "grad_norm": 3.3843605518341064, | |
| "learning_rate": 2.208067940552017e-05, | |
| "loss": 0.1013, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "grad_norm": 0.20773310959339142, | |
| "learning_rate": 2.1656050955414015e-05, | |
| "loss": 0.0753, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "eval_accuracy": 0.7371428571428571, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 72, | |
| 4, | |
| 9, | |
| 2, | |
| 0, | |
| 3, | |
| 3, | |
| 2, | |
| 4, | |
| 1 | |
| ], | |
| [ | |
| 46, | |
| 110, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 5, | |
| 3, | |
| 27, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 51, | |
| 1, | |
| 1, | |
| 6, | |
| 1, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 5, | |
| 30, | |
| 6, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 6 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 2, | |
| 3, | |
| 80, | |
| 2, | |
| 1, | |
| 1, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 7, | |
| 2, | |
| 8, | |
| 4, | |
| 0, | |
| 49, | |
| 0, | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 55, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 4, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 76, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 34, | |
| 1, | |
| 0, | |
| 1, | |
| 1, | |
| 12, | |
| 1, | |
| 120, | |
| 1 | |
| ], | |
| [ | |
| 7, | |
| 1, | |
| 1, | |
| 9, | |
| 0, | |
| 3, | |
| 0, | |
| 1, | |
| 0, | |
| 131 | |
| ] | |
| ], | |
| "eval_loss": 1.2866138219833374, | |
| "eval_runtime": 15.4187, | |
| "eval_samples_per_second": 68.099, | |
| "eval_steps_per_second": 4.281, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 13.41, | |
| "grad_norm": 1.7785210609436035, | |
| "learning_rate": 2.1231422505307857e-05, | |
| "loss": 0.0957, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 13.44, | |
| "grad_norm": 4.4371185302734375, | |
| "learning_rate": 2.0806794055201702e-05, | |
| "loss": 0.0679, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 13.47, | |
| "grad_norm": 3.352201223373413, | |
| "learning_rate": 2.0382165605095544e-05, | |
| "loss": 0.0673, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "grad_norm": 1.5773141384124756, | |
| "learning_rate": 1.9957537154989385e-05, | |
| "loss": 0.1248, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 13.54, | |
| "grad_norm": 1.759545922279358, | |
| "learning_rate": 1.953290870488323e-05, | |
| "loss": 0.0741, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 13.57, | |
| "grad_norm": 2.9278829097747803, | |
| "learning_rate": 1.910828025477707e-05, | |
| "loss": 0.0693, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "grad_norm": 0.01691564917564392, | |
| "learning_rate": 1.8683651804670913e-05, | |
| "loss": 0.095, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 13.63, | |
| "grad_norm": 3.3381359577178955, | |
| "learning_rate": 1.8259023354564755e-05, | |
| "loss": 0.0699, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 13.66, | |
| "grad_norm": 0.33229127526283264, | |
| "learning_rate": 1.78343949044586e-05, | |
| "loss": 0.0591, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 13.69, | |
| "grad_norm": 0.5169569849967957, | |
| "learning_rate": 1.740976645435244e-05, | |
| "loss": 0.0766, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 13.69, | |
| "eval_accuracy": 0.7323809523809524, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 68, | |
| 5, | |
| 9, | |
| 5, | |
| 0, | |
| 2, | |
| 3, | |
| 3, | |
| 3, | |
| 2 | |
| ], | |
| [ | |
| 53, | |
| 106, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 6, | |
| 4, | |
| 22, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 54, | |
| 1, | |
| 1, | |
| 4, | |
| 1, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 5, | |
| 34, | |
| 4, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 1, | |
| 4, | |
| 79, | |
| 2, | |
| 2, | |
| 1, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 2, | |
| 10, | |
| 4, | |
| 0, | |
| 47, | |
| 1, | |
| 2, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 55, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 3, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 77, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 10, | |
| 37, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 13, | |
| 1, | |
| 115, | |
| 2 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 1, | |
| 11, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 134 | |
| ] | |
| ], | |
| "eval_loss": 1.3334447145462036, | |
| "eval_runtime": 15.6561, | |
| "eval_samples_per_second": 67.067, | |
| "eval_steps_per_second": 4.216, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "grad_norm": 3.5023386478424072, | |
| "learning_rate": 1.6985138004246283e-05, | |
| "loss": 0.0602, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 13.76, | |
| "grad_norm": 1.4914608001708984, | |
| "learning_rate": 1.6560509554140128e-05, | |
| "loss": 0.0985, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "grad_norm": 3.574211359024048, | |
| "learning_rate": 1.613588110403397e-05, | |
| "loss": 0.1329, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "grad_norm": 1.6108194589614868, | |
| "learning_rate": 1.5711252653927814e-05, | |
| "loss": 0.0571, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "grad_norm": 1.1470590829849243, | |
| "learning_rate": 1.5286624203821656e-05, | |
| "loss": 0.0615, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "grad_norm": 1.373115062713623, | |
| "learning_rate": 1.48619957537155e-05, | |
| "loss": 0.0951, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "grad_norm": 0.7028450965881348, | |
| "learning_rate": 1.4437367303609342e-05, | |
| "loss": 0.0556, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "grad_norm": 1.0304226875305176, | |
| "learning_rate": 1.4012738853503186e-05, | |
| "loss": 0.0641, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "grad_norm": 2.3109376430511475, | |
| "learning_rate": 1.3588110403397027e-05, | |
| "loss": 0.0375, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "grad_norm": 0.2413637489080429, | |
| "learning_rate": 1.316348195329087e-05, | |
| "loss": 0.0699, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "eval_accuracy": 0.7276190476190476, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 66, | |
| 3, | |
| 8, | |
| 5, | |
| 0, | |
| 2, | |
| 3, | |
| 5, | |
| 6, | |
| 2 | |
| ], | |
| [ | |
| 59, | |
| 94, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 5, | |
| 5, | |
| 28, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 53, | |
| 1, | |
| 1, | |
| 4, | |
| 1, | |
| 1, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 5, | |
| 33, | |
| 5, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 4 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 2, | |
| 4, | |
| 79, | |
| 2, | |
| 2, | |
| 1, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 13, | |
| 5, | |
| 0, | |
| 44, | |
| 1, | |
| 3, | |
| 1, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 79, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 10, | |
| 27, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 12, | |
| 2, | |
| 125, | |
| 2 | |
| ], | |
| [ | |
| 5, | |
| 0, | |
| 1, | |
| 11, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 135 | |
| ] | |
| ], | |
| "eval_loss": 1.390450119972229, | |
| "eval_runtime": 15.2795, | |
| "eval_samples_per_second": 68.72, | |
| "eval_steps_per_second": 4.32, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 14.04, | |
| "grad_norm": 0.03319675475358963, | |
| "learning_rate": 1.2738853503184714e-05, | |
| "loss": 0.0788, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "grad_norm": 0.9733434319496155, | |
| "learning_rate": 1.2314225053078557e-05, | |
| "loss": 0.1165, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "grad_norm": 0.15556958317756653, | |
| "learning_rate": 1.18895966029724e-05, | |
| "loss": 0.0719, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "grad_norm": 0.46565964818000793, | |
| "learning_rate": 1.1464968152866242e-05, | |
| "loss": 0.0699, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "grad_norm": 0.6882303357124329, | |
| "learning_rate": 1.1040339702760085e-05, | |
| "loss": 0.1229, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 14.2, | |
| "grad_norm": 0.026163959875702858, | |
| "learning_rate": 1.0615711252653929e-05, | |
| "loss": 0.0653, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "grad_norm": 2.7348830699920654, | |
| "learning_rate": 1.0191082802547772e-05, | |
| "loss": 0.0818, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 14.27, | |
| "grad_norm": 0.20675204694271088, | |
| "learning_rate": 9.766454352441615e-06, | |
| "loss": 0.0578, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 14.3, | |
| "grad_norm": 2.2523086071014404, | |
| "learning_rate": 9.341825902335457e-06, | |
| "loss": 0.1141, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 14.33, | |
| "grad_norm": 0.6537735462188721, | |
| "learning_rate": 8.9171974522293e-06, | |
| "loss": 0.1218, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 14.33, | |
| "eval_accuracy": 0.7323809523809524, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 68, | |
| 3, | |
| 8, | |
| 4, | |
| 0, | |
| 1, | |
| 3, | |
| 4, | |
| 7, | |
| 2 | |
| ], | |
| [ | |
| 58, | |
| 92, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 5, | |
| 4, | |
| 32, | |
| 0 | |
| ], | |
| [ | |
| 1, | |
| 0, | |
| 54, | |
| 1, | |
| 1, | |
| 4, | |
| 1, | |
| 0, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 5, | |
| 33, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 1, | |
| 2, | |
| 4, | |
| 80, | |
| 3, | |
| 2, | |
| 1, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 1, | |
| 13, | |
| 4, | |
| 0, | |
| 44, | |
| 2, | |
| 3, | |
| 1, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 1, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 79, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 26, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 15, | |
| 1, | |
| 126, | |
| 1 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 1, | |
| 10, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 137 | |
| ] | |
| ], | |
| "eval_loss": 1.3635234832763672, | |
| "eval_runtime": 15.2732, | |
| "eval_samples_per_second": 68.748, | |
| "eval_steps_per_second": 4.321, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 14.36, | |
| "grad_norm": 4.527649402618408, | |
| "learning_rate": 8.492569002123141e-06, | |
| "loss": 0.0613, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "grad_norm": 1.7878096103668213, | |
| "learning_rate": 8.067940552016985e-06, | |
| "loss": 0.0759, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 14.43, | |
| "grad_norm": 3.677371025085449, | |
| "learning_rate": 7.643312101910828e-06, | |
| "loss": 0.0909, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "grad_norm": 0.7390831708908081, | |
| "learning_rate": 7.218683651804671e-06, | |
| "loss": 0.0565, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 14.49, | |
| "grad_norm": 0.6755375862121582, | |
| "learning_rate": 6.794055201698514e-06, | |
| "loss": 0.052, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 14.52, | |
| "grad_norm": 0.20097573101520538, | |
| "learning_rate": 6.369426751592357e-06, | |
| "loss": 0.0881, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "grad_norm": 0.3152300715446472, | |
| "learning_rate": 5.9447983014862e-06, | |
| "loss": 0.0575, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 14.59, | |
| "grad_norm": 3.195133924484253, | |
| "learning_rate": 5.520169851380043e-06, | |
| "loss": 0.1314, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "grad_norm": 3.097222328186035, | |
| "learning_rate": 5.095541401273886e-06, | |
| "loss": 0.078, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 14.65, | |
| "grad_norm": 0.018327347934246063, | |
| "learning_rate": 4.670912951167728e-06, | |
| "loss": 0.0648, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 14.65, | |
| "eval_accuracy": 0.7342857142857143, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 66, | |
| 3, | |
| 9, | |
| 5, | |
| 0, | |
| 2, | |
| 3, | |
| 3, | |
| 7, | |
| 2 | |
| ], | |
| [ | |
| 52, | |
| 95, | |
| 0, | |
| 0, | |
| 2, | |
| 1, | |
| 6, | |
| 4, | |
| 33, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 52, | |
| 1, | |
| 1, | |
| 3, | |
| 2, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 5, | |
| 33, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 1, | |
| 3, | |
| 83, | |
| 1, | |
| 2, | |
| 1, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 13, | |
| 4, | |
| 0, | |
| 44, | |
| 2, | |
| 2, | |
| 1, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 78, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 26, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 14, | |
| 1, | |
| 127, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 1, | |
| 10, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 0, | |
| 137 | |
| ] | |
| ], | |
| "eval_loss": 1.3204517364501953, | |
| "eval_runtime": 15.2766, | |
| "eval_samples_per_second": 68.733, | |
| "eval_steps_per_second": 4.32, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 14.68, | |
| "grad_norm": 0.03679550439119339, | |
| "learning_rate": 4.246284501061571e-06, | |
| "loss": 0.0597, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 14.71, | |
| "grad_norm": 3.308004140853882, | |
| "learning_rate": 3.821656050955414e-06, | |
| "loss": 0.0358, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 14.75, | |
| "grad_norm": 3.7965080738067627, | |
| "learning_rate": 3.397027600849257e-06, | |
| "loss": 0.0974, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "grad_norm": 1.77022385597229, | |
| "learning_rate": 2.9723991507431e-06, | |
| "loss": 0.1215, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 14.81, | |
| "grad_norm": 1.951418399810791, | |
| "learning_rate": 2.547770700636943e-06, | |
| "loss": 0.0692, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 14.84, | |
| "grad_norm": 2.4297380447387695, | |
| "learning_rate": 2.1231422505307854e-06, | |
| "loss": 0.0927, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 14.87, | |
| "grad_norm": 0.13824594020843506, | |
| "learning_rate": 1.6985138004246284e-06, | |
| "loss": 0.0704, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "grad_norm": 2.2139551639556885, | |
| "learning_rate": 1.2738853503184715e-06, | |
| "loss": 0.1461, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 14.94, | |
| "grad_norm": 0.4319517910480499, | |
| "learning_rate": 8.492569002123142e-07, | |
| "loss": 0.0511, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 14.97, | |
| "grad_norm": 0.4486578702926636, | |
| "learning_rate": 4.246284501061571e-07, | |
| "loss": 0.0917, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 14.97, | |
| "eval_accuracy": 0.7342857142857143, | |
| "eval_confusion_matrix": [ | |
| [ | |
| 66, | |
| 3, | |
| 9, | |
| 5, | |
| 0, | |
| 2, | |
| 3, | |
| 3, | |
| 7, | |
| 2 | |
| ], | |
| [ | |
| 52, | |
| 98, | |
| 0, | |
| 0, | |
| 2, | |
| 1, | |
| 6, | |
| 4, | |
| 30, | |
| 0 | |
| ], | |
| [ | |
| 3, | |
| 1, | |
| 52, | |
| 1, | |
| 1, | |
| 3, | |
| 2, | |
| 0, | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 4, | |
| 0, | |
| 5, | |
| 33, | |
| 5, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 5 | |
| ], | |
| [ | |
| 0, | |
| 2, | |
| 1, | |
| 3, | |
| 83, | |
| 1, | |
| 2, | |
| 1, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 2, | |
| 2, | |
| 13, | |
| 4, | |
| 0, | |
| 44, | |
| 2, | |
| 2, | |
| 1, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 56, | |
| 0, | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 2, | |
| 0, | |
| 2, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| 78, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 28, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| 15, | |
| 1, | |
| 124, | |
| 1 | |
| ], | |
| [ | |
| 3, | |
| 0, | |
| 1, | |
| 10, | |
| 0, | |
| 1, | |
| 0, | |
| 1, | |
| 0, | |
| 137 | |
| ] | |
| ], | |
| "eval_loss": 1.3111604452133179, | |
| "eval_runtime": 15.4688, | |
| "eval_samples_per_second": 67.878, | |
| "eval_steps_per_second": 4.267, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 2.132920980453491, | |
| "learning_rate": 0.0, | |
| "loss": 0.0933, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "step": 4710, | |
| "total_flos": 1.1677717704563528e+19, | |
| "train_loss": 0.19079542797209098, | |
| "train_runtime": 4656.9657, | |
| "train_samples_per_second": 32.323, | |
| "train_steps_per_second": 1.011 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4710, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 100, | |
| "total_flos": 1.1677717704563528e+19, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |