| { | |
| "best_metric": 0.993526599442064, | |
| "best_model_checkpoint": "output_dev/v34-deberta_large_v3-f1-warmup-greater-patience-seqlen-240-6epoch/checkpoint-6600", | |
| "epoch": 3.1755542240862793, | |
| "eval_steps": 200, | |
| "global_step": 10600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.990019960079841e-07, | |
| "loss": 1.9756, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.980039920159682e-07, | |
| "loss": 1.8411, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.4970059880239521e-06, | |
| "loss": 1.5232, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.9960079840319363e-06, | |
| "loss": 1.0963, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.4950099800399203e-06, | |
| "loss": 0.7066, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.9940119760479042e-06, | |
| "loss": 0.5216, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.493013972055888e-06, | |
| "loss": 0.4144, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.992015968063873e-06, | |
| "loss": 0.3314, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.4910179640718566e-06, | |
| "loss": 0.2475, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9900199600798405e-06, | |
| "loss": 0.2203, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.4890219560878245e-06, | |
| "loss": 0.2609, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5.9880239520958085e-06, | |
| "loss": 0.2257, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.4870259481037925e-06, | |
| "loss": 0.1866, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.986027944111776e-06, | |
| "loss": 0.144, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.48502994011976e-06, | |
| "loss": 0.1259, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 7.984031936127745e-06, | |
| "loss": 0.1448, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.483033932135728e-06, | |
| "loss": 0.1018, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.982035928143713e-06, | |
| "loss": 0.1446, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.481037924151696e-06, | |
| "loss": 0.1391, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.980039920159681e-06, | |
| "loss": 0.0954, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_f1": 0.9439515816965873, | |
| "eval_f1_0": 0.9814644883052654, | |
| "eval_f1_1": 0.9883150101831029, | |
| "eval_f1_2": 0.8092399020555978, | |
| "eval_f1_3": 0.9967869262423833, | |
| "eval_loss": 0.07926841825246811, | |
| "eval_runtime": 52.7827, | |
| "eval_samples_per_second": 224.846, | |
| "eval_steps_per_second": 0.89, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.0479041916167664e-05, | |
| "loss": 0.0892, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.0978043912175649e-05, | |
| "loss": 0.0809, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.1477045908183632e-05, | |
| "loss": 0.0819, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.1976047904191617e-05, | |
| "loss": 0.0607, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.24750499001996e-05, | |
| "loss": 0.0641, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.2974051896207585e-05, | |
| "loss": 0.0579, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.347305389221557e-05, | |
| "loss": 0.0686, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3972055888223553e-05, | |
| "loss": 0.0353, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.4471057884231538e-05, | |
| "loss": 0.0894, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.497005988023952e-05, | |
| "loss": 0.0411, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.5469061876247507e-05, | |
| "loss": 0.0391, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.596806387225549e-05, | |
| "loss": 0.0548, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.6467065868263474e-05, | |
| "loss": 0.0768, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.6966067864271457e-05, | |
| "loss": 0.0373, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.7465069860279443e-05, | |
| "loss": 0.0519, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.7964071856287426e-05, | |
| "loss": 0.0399, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.846307385229541e-05, | |
| "loss": 0.0972, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8962075848303393e-05, | |
| "loss": 0.0354, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.946107784431138e-05, | |
| "loss": 0.0827, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9960079840319362e-05, | |
| "loss": 0.0854, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_f1": 0.9588132852653405, | |
| "eval_f1_0": 0.9854538374890487, | |
| "eval_f1_1": 0.9896395970160494, | |
| "eval_f1_2": 0.8628368371448905, | |
| "eval_f1_3": 0.9973228694113733, | |
| "eval_loss": 0.08576956391334534, | |
| "eval_runtime": 53.1054, | |
| "eval_samples_per_second": 223.48, | |
| "eval_steps_per_second": 0.885, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.0459081836327345e-05, | |
| "loss": 0.0434, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.095808383233533e-05, | |
| "loss": 0.0715, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.1457085828343315e-05, | |
| "loss": 0.0385, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.1956087824351298e-05, | |
| "loss": 0.0474, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.245508982035928e-05, | |
| "loss": 0.0997, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.2954091816367264e-05, | |
| "loss": 0.0355, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.345309381237525e-05, | |
| "loss": 0.0515, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.3952095808383234e-05, | |
| "loss": 0.041, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.4451097804391217e-05, | |
| "loss": 0.0433, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.49500998003992e-05, | |
| "loss": 0.0308, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.5449101796407187e-05, | |
| "loss": 0.0449, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.594810379241517e-05, | |
| "loss": 0.0524, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.6447105788423153e-05, | |
| "loss": 0.0551, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.694610778443114e-05, | |
| "loss": 0.0535, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.7445109780439123e-05, | |
| "loss": 0.0539, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.7944111776447106e-05, | |
| "loss": 0.1137, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8443113772455092e-05, | |
| "loss": 0.0549, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8942115768463075e-05, | |
| "loss": 0.0349, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.944111776447106e-05, | |
| "loss": 0.0721, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.994011976047904e-05, | |
| "loss": 0.0206, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_f1": 0.9834921014335102, | |
| "eval_f1_0": 0.9949107875077299, | |
| "eval_f1_1": 0.9961045718004788, | |
| "eval_f1_2": 0.9451365879813575, | |
| "eval_f1_3": 0.9978164584444748, | |
| "eval_loss": 0.028582798317074776, | |
| "eval_runtime": 53.2231, | |
| "eval_samples_per_second": 222.986, | |
| "eval_steps_per_second": 0.883, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.0439121756487028e-05, | |
| "loss": 0.0223, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.0938123752495015e-05, | |
| "loss": 0.1399, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.143712574850299e-05, | |
| "loss": 0.0508, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.193612774451098e-05, | |
| "loss": 0.0129, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.2435129740518964e-05, | |
| "loss": 0.0671, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.293413173652695e-05, | |
| "loss": 0.0246, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.343313373253493e-05, | |
| "loss": 0.0491, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.393213572854291e-05, | |
| "loss": 0.0762, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.4431137724550896e-05, | |
| "loss": 0.0414, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.4930139720558886e-05, | |
| "loss": 0.0695, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.542914171656686e-05, | |
| "loss": 0.0396, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.592814371257485e-05, | |
| "loss": 0.0526, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.6427145708582836e-05, | |
| "loss": 0.0244, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.692614770459082e-05, | |
| "loss": 0.04, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.74251497005988e-05, | |
| "loss": 0.0425, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.7924151696606785e-05, | |
| "loss": 0.0213, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8423153692614775e-05, | |
| "loss": 0.0424, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.892215568862276e-05, | |
| "loss": 0.0457, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.942115768463074e-05, | |
| "loss": 0.0576, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.9920159680638724e-05, | |
| "loss": 0.1332, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_f1": 0.9738886081580435, | |
| "eval_f1_0": 0.9914198678301208, | |
| "eval_f1_1": 0.9887483743062586, | |
| "eval_f1_2": 0.9229570008505884, | |
| "eval_f1_3": 0.9924291896452061, | |
| "eval_loss": 0.04060590639710426, | |
| "eval_runtime": 53.3557, | |
| "eval_samples_per_second": 222.432, | |
| "eval_steps_per_second": 0.881, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.041916167664671e-05, | |
| "loss": 0.0437, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.091816367265469e-05, | |
| "loss": 0.0643, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.141716566866268e-05, | |
| "loss": 0.0208, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.191616766467066e-05, | |
| "loss": 0.0252, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.241516966067865e-05, | |
| "loss": 0.0494, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.291417165668663e-05, | |
| "loss": 0.0167, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.341317365269461e-05, | |
| "loss": 0.0499, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.3912175648702596e-05, | |
| "loss": 0.0649, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.4411177644710586e-05, | |
| "loss": 0.0572, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.491017964071856e-05, | |
| "loss": 0.0508, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.540918163672655e-05, | |
| "loss": 0.1019, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.590818363273453e-05, | |
| "loss": 0.0529, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.640718562874252e-05, | |
| "loss": 0.0506, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.69061876247505e-05, | |
| "loss": 0.0767, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.7405189620758485e-05, | |
| "loss": 0.0351, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.790419161676647e-05, | |
| "loss": 0.038, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.840319361277446e-05, | |
| "loss": 0.0589, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.8902195608782434e-05, | |
| "loss": 0.0338, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9401197604790424e-05, | |
| "loss": 0.0593, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.99001996007984e-05, | |
| "loss": 0.0263, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_f1": 0.9540581603427759, | |
| "eval_f1_0": 0.9875224442592623, | |
| "eval_f1_1": 0.996863689344212, | |
| "eval_f1_2": 0.8345400374073041, | |
| "eval_f1_3": 0.9973064703603255, | |
| "eval_loss": 0.06365196406841278, | |
| "eval_runtime": 53.2716, | |
| "eval_samples_per_second": 222.783, | |
| "eval_steps_per_second": 0.882, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.997897613791654e-05, | |
| "loss": 0.0399, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.995269631031221e-05, | |
| "loss": 0.0461, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.9926416482707875e-05, | |
| "loss": 0.0289, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.990013665510355e-05, | |
| "loss": 0.033, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.987385682749922e-05, | |
| "loss": 0.0549, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.9847576999894884e-05, | |
| "loss": 0.0373, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.9821297172290556e-05, | |
| "loss": 0.0359, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.979501734468622e-05, | |
| "loss": 0.1199, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.976873751708189e-05, | |
| "loss": 0.0546, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.9742457689477564e-05, | |
| "loss": 0.0299, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.971617786187323e-05, | |
| "loss": 0.0481, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.96898980342689e-05, | |
| "loss": 0.0205, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.9663618206664566e-05, | |
| "loss": 0.0522, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.963733837906023e-05, | |
| "loss": 0.0449, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.96110585514559e-05, | |
| "loss": 0.0337, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.9584778723851575e-05, | |
| "loss": 0.0085, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.955849889624724e-05, | |
| "loss": 0.1178, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.953221906864291e-05, | |
| "loss": 0.0294, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.9505939241038577e-05, | |
| "loss": 0.0229, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.947965941343425e-05, | |
| "loss": 0.0268, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_f1": 0.9767413552749206, | |
| "eval_f1_0": 0.9923689597585356, | |
| "eval_f1_1": 0.9898508596941199, | |
| "eval_f1_2": 0.9286801598218379, | |
| "eval_f1_3": 0.9960654418251892, | |
| "eval_loss": 0.04390137270092964, | |
| "eval_runtime": 53.2749, | |
| "eval_samples_per_second": 222.769, | |
| "eval_steps_per_second": 0.882, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.945337958582992e-05, | |
| "loss": 0.0236, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.9427099758225585e-05, | |
| "loss": 0.0315, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.940081993062126e-05, | |
| "loss": 0.024, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.937454010301692e-05, | |
| "loss": 0.0675, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.9348260275412594e-05, | |
| "loss": 0.0241, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.9321980447808266e-05, | |
| "loss": 0.0489, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.929570062020393e-05, | |
| "loss": 0.0322, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.92694207925996e-05, | |
| "loss": 0.03, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.9243140964995274e-05, | |
| "loss": 0.0488, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.921686113739094e-05, | |
| "loss": 0.0417, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.919058130978661e-05, | |
| "loss": 0.0283, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.9164301482182276e-05, | |
| "loss": 0.0441, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.913802165457795e-05, | |
| "loss": 0.0279, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.911174182697362e-05, | |
| "loss": 0.1011, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.9085461999369285e-05, | |
| "loss": 0.0374, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.9059182171764956e-05, | |
| "loss": 0.0632, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.903290234416063e-05, | |
| "loss": 0.0235, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.900662251655629e-05, | |
| "loss": 0.0326, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.8980342688951965e-05, | |
| "loss": 0.0333, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.895406286134763e-05, | |
| "loss": 0.0285, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_f1": 0.9798388589684135, | |
| "eval_f1_0": 0.9935598148642307, | |
| "eval_f1_1": 0.9969184361733485, | |
| "eval_f1_2": 0.9313853930948695, | |
| "eval_f1_3": 0.997491791741205, | |
| "eval_loss": 0.03308385610580444, | |
| "eval_runtime": 53.2818, | |
| "eval_samples_per_second": 222.74, | |
| "eval_steps_per_second": 0.882, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.89277830337433e-05, | |
| "loss": 0.078, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.8901503206138974e-05, | |
| "loss": 0.0296, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.887522337853464e-05, | |
| "loss": 0.0189, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.884894355093031e-05, | |
| "loss": 0.0778, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.8822663723325975e-05, | |
| "loss": 0.0461, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.879638389572165e-05, | |
| "loss": 0.0226, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.877010406811732e-05, | |
| "loss": 0.0578, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.8743824240512984e-05, | |
| "loss": 0.0235, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.8717544412908656e-05, | |
| "loss": 0.0191, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.869126458530433e-05, | |
| "loss": 0.0691, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.866498475769999e-05, | |
| "loss": 0.0303, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.8638704930095664e-05, | |
| "loss": 0.0321, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.861242510249133e-05, | |
| "loss": 0.03, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.8586145274887e-05, | |
| "loss": 0.0415, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.855986544728267e-05, | |
| "loss": 0.0396, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.853358561967833e-05, | |
| "loss": 0.0186, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.8507305792074e-05, | |
| "loss": 0.0275, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.8481025964469675e-05, | |
| "loss": 0.0633, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.845474613686534e-05, | |
| "loss": 0.0265, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.842846630926101e-05, | |
| "loss": 0.048, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_f1": 0.96749762324556, | |
| "eval_f1_0": 0.9887758866484124, | |
| "eval_f1_1": 0.9954477523516124, | |
| "eval_f1_2": 0.8874648365802215, | |
| "eval_f1_3": 0.9983020174019935, | |
| "eval_loss": 0.05787326395511627, | |
| "eval_runtime": 53.2266, | |
| "eval_samples_per_second": 222.971, | |
| "eval_steps_per_second": 0.883, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.8402186481656684e-05, | |
| "loss": 0.0518, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.837590665405235e-05, | |
| "loss": 0.0659, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.834962682644802e-05, | |
| "loss": 0.032, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.8323346998843685e-05, | |
| "loss": 0.0248, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.829706717123936e-05, | |
| "loss": 0.0347, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.827078734363503e-05, | |
| "loss": 0.0086, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.8244507516030694e-05, | |
| "loss": 0.0196, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.8218227688426366e-05, | |
| "loss": 0.0692, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.819194786082204e-05, | |
| "loss": 0.0225, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.81656680332177e-05, | |
| "loss": 0.0059, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.8139388205613374e-05, | |
| "loss": 0.0389, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.811310837800904e-05, | |
| "loss": 0.0111, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.808682855040471e-05, | |
| "loss": 0.0143, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.806054872280038e-05, | |
| "loss": 0.0269, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.803426889519605e-05, | |
| "loss": 0.014, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.800798906759172e-05, | |
| "loss": 0.0511, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.7981709239987385e-05, | |
| "loss": 0.0541, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.795542941238306e-05, | |
| "loss": 0.0225, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.792914958477873e-05, | |
| "loss": 0.0277, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.7902869757174393e-05, | |
| "loss": 0.0405, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_f1": 0.9814626692863833, | |
| "eval_f1_0": 0.9941325744122351, | |
| "eval_f1_1": 0.9968333199151974, | |
| "eval_f1_2": 0.936212977156517, | |
| "eval_f1_3": 0.9986718056615835, | |
| "eval_loss": 0.03122573159635067, | |
| "eval_runtime": 53.2789, | |
| "eval_samples_per_second": 222.752, | |
| "eval_steps_per_second": 0.882, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.7876589929570065e-05, | |
| "loss": 0.0296, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.785031010196574e-05, | |
| "loss": 0.0165, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.78240302743614e-05, | |
| "loss": 0.0221, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.7797750446757074e-05, | |
| "loss": 0.0557, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.777147061915274e-05, | |
| "loss": 0.063, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.774519079154841e-05, | |
| "loss": 0.0425, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.771891096394408e-05, | |
| "loss": 0.0653, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.769263113633975e-05, | |
| "loss": 0.0647, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.766635130873542e-05, | |
| "loss": 0.0344, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.764007148113109e-05, | |
| "loss": 0.0473, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.7613791653526756e-05, | |
| "loss": 0.0264, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.758751182592243e-05, | |
| "loss": 0.0387, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.756123199831809e-05, | |
| "loss": 0.0176, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.7534952170713765e-05, | |
| "loss": 0.0394, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.7508672343109436e-05, | |
| "loss": 0.062, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.74823925155051e-05, | |
| "loss": 0.025, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.745611268790077e-05, | |
| "loss": 0.0155, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.742983286029644e-05, | |
| "loss": 0.0777, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.74035530326921e-05, | |
| "loss": 0.0351, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.7377273205087775e-05, | |
| "loss": 0.0303, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_f1": 0.9755338337989246, | |
| "eval_f1_0": 0.991693038431387, | |
| "eval_f1_1": 0.9877976376772832, | |
| "eval_f1_2": 0.9242240359623443, | |
| "eval_f1_3": 0.9984206231246839, | |
| "eval_loss": 0.04797644168138504, | |
| "eval_runtime": 53.2786, | |
| "eval_samples_per_second": 222.754, | |
| "eval_steps_per_second": 0.882, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.735099337748345e-05, | |
| "loss": 0.0407, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.732471354987911e-05, | |
| "loss": 0.0417, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.7298433722274784e-05, | |
| "loss": 0.0374, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.727215389467045e-05, | |
| "loss": 0.023, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.724587406706612e-05, | |
| "loss": 0.0271, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.721959423946179e-05, | |
| "loss": 0.0305, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.719331441185746e-05, | |
| "loss": 0.0368, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.716703458425313e-05, | |
| "loss": 0.0251, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.7140754756648794e-05, | |
| "loss": 0.077, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.7114474929044466e-05, | |
| "loss": 0.0095, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.708819510144014e-05, | |
| "loss": 0.0311, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.70619152738358e-05, | |
| "loss": 0.053, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.7035635446231475e-05, | |
| "loss": 0.0396, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.7009355618627146e-05, | |
| "loss": 0.0376, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.698307579102281e-05, | |
| "loss": 0.0305, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.695679596341848e-05, | |
| "loss": 0.0246, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.693051613581415e-05, | |
| "loss": 0.0169, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.690423630820982e-05, | |
| "loss": 0.037, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.687795648060549e-05, | |
| "loss": 0.0289, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.685167665300116e-05, | |
| "loss": 0.0389, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_f1": 0.9832259546184603, | |
| "eval_f1_0": 0.9946489155319365, | |
| "eval_f1_1": 0.9966030472889557, | |
| "eval_f1_2": 0.942989870485977, | |
| "eval_f1_3": 0.9986619851669725, | |
| "eval_loss": 0.027967283502221107, | |
| "eval_runtime": 53.2558, | |
| "eval_samples_per_second": 222.849, | |
| "eval_steps_per_second": 0.883, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.682539682539683e-05, | |
| "loss": 0.0477, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.67991169977925e-05, | |
| "loss": 0.029, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.6772837170188165e-05, | |
| "loss": 0.0405, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.674655734258384e-05, | |
| "loss": 0.0305, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.67202775149795e-05, | |
| "loss": 0.0323, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.6693997687375174e-05, | |
| "loss": 0.0155, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.6667717859770846e-05, | |
| "loss": 0.0205, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.664143803216651e-05, | |
| "loss": 0.0333, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.661515820456218e-05, | |
| "loss": 0.0231, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.658887837695785e-05, | |
| "loss": 0.0875, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.656259854935352e-05, | |
| "loss": 0.0398, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.653631872174919e-05, | |
| "loss": 0.0253, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.6510038894144856e-05, | |
| "loss": 0.039, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.648375906654053e-05, | |
| "loss": 0.0173, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.64574792389362e-05, | |
| "loss": 0.0143, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.6431199411331865e-05, | |
| "loss": 0.0337, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.640491958372754e-05, | |
| "loss": 0.0312, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.63786397561232e-05, | |
| "loss": 0.0258, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.6352359928518873e-05, | |
| "loss": 0.0154, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.6326080100914545e-05, | |
| "loss": 0.0408, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_f1": 0.9872307516679847, | |
| "eval_f1_0": 0.9959209993448118, | |
| "eval_f1_1": 0.9971936532504775, | |
| "eval_f1_2": 0.9570808398310161, | |
| "eval_f1_3": 0.9987275142456332, | |
| "eval_loss": 0.0252233799546957, | |
| "eval_runtime": 53.2552, | |
| "eval_samples_per_second": 222.851, | |
| "eval_steps_per_second": 0.883, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.6299800273310204e-05, | |
| "loss": 0.0187, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.6273520445705875e-05, | |
| "loss": 0.0048, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.624724061810155e-05, | |
| "loss": 0.0156, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.622096079049721e-05, | |
| "loss": 0.008, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.6194680962892884e-05, | |
| "loss": 0.0582, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.6168401135288556e-05, | |
| "loss": 0.0024, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.614212130768422e-05, | |
| "loss": 0.0291, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.611584148007989e-05, | |
| "loss": 0.0394, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.608956165247556e-05, | |
| "loss": 0.0184, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.606328182487123e-05, | |
| "loss": 0.0401, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.60370019972669e-05, | |
| "loss": 0.0456, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.6010722169662566e-05, | |
| "loss": 0.0229, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.598444234205824e-05, | |
| "loss": 0.0277, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.59581625144539e-05, | |
| "loss": 0.025, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.5931882686849575e-05, | |
| "loss": 0.0149, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.5905602859245247e-05, | |
| "loss": 0.017, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.587932303164091e-05, | |
| "loss": 0.0231, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.585304320403658e-05, | |
| "loss": 0.0129, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.5826763376432255e-05, | |
| "loss": 0.0147, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.580048354882792e-05, | |
| "loss": 0.0196, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_f1": 0.9887726640579566, | |
| "eval_f1_0": 0.9963991401714185, | |
| "eval_f1_1": 0.9967365610435348, | |
| "eval_f1_2": 0.9631465838244995, | |
| "eval_f1_3": 0.9988083711923735, | |
| "eval_loss": 0.02168872021138668, | |
| "eval_runtime": 53.355, | |
| "eval_samples_per_second": 222.435, | |
| "eval_steps_per_second": 0.881, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.577420372122359e-05, | |
| "loss": 0.0072, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.574792389361926e-05, | |
| "loss": 0.03, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.572164406601493e-05, | |
| "loss": 0.0481, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.56953642384106e-05, | |
| "loss": 0.08, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.5669084410806266e-05, | |
| "loss": 0.0629, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.564280458320194e-05, | |
| "loss": 0.038, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.561652475559761e-05, | |
| "loss": 0.0187, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.5590244927993274e-05, | |
| "loss": 0.0428, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.5563965100388946e-05, | |
| "loss": 0.0157, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.553768527278461e-05, | |
| "loss": 0.0149, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.551140544518028e-05, | |
| "loss": 0.0224, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.5485125617575955e-05, | |
| "loss": 0.0391, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.545884578997162e-05, | |
| "loss": 0.0365, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.543256596236729e-05, | |
| "loss": 0.0371, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.5406286134762956e-05, | |
| "loss": 0.0165, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.538000630715863e-05, | |
| "loss": 0.0354, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.53537264795543e-05, | |
| "loss": 0.0225, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.5327446651949965e-05, | |
| "loss": 0.0552, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.530116682434564e-05, | |
| "loss": 0.0293, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.527488699674131e-05, | |
| "loss": 0.039, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_f1": 0.986085281999668, | |
| "eval_f1_0": 0.9957410000522015, | |
| "eval_f1_1": 0.9973185411943425, | |
| "eval_f1_2": 0.9525071187726427, | |
| "eval_f1_3": 0.9987744679794855, | |
| "eval_loss": 0.025714803487062454, | |
| "eval_runtime": 53.2058, | |
| "eval_samples_per_second": 223.058, | |
| "eval_steps_per_second": 0.883, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.5248607169136974e-05, | |
| "loss": 0.0285, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.5222327341532645e-05, | |
| "loss": 0.0372, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.519604751392831e-05, | |
| "loss": 0.0675, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.5169767686323976e-05, | |
| "loss": 0.0139, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.514348785871965e-05, | |
| "loss": 0.0087, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.511720803111531e-05, | |
| "loss": 0.0161, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.5090928203510984e-05, | |
| "loss": 0.031, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.5064648375906656e-05, | |
| "loss": 0.0776, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.503836854830232e-05, | |
| "loss": 0.0099, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.501208872069799e-05, | |
| "loss": 0.0671, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.4985808893093665e-05, | |
| "loss": 0.0351, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.495952906548933e-05, | |
| "loss": 0.0331, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.4933249237885e-05, | |
| "loss": 0.0685, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.4906969410280666e-05, | |
| "loss": 0.0241, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.488068958267634e-05, | |
| "loss": 0.009, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.485440975507201e-05, | |
| "loss": 0.0556, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.4828129927467675e-05, | |
| "loss": 0.0149, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.480185009986335e-05, | |
| "loss": 0.0253, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.477557027225902e-05, | |
| "loss": 0.0231, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.4749290444654684e-05, | |
| "loss": 0.0077, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_f1": 0.9861304273680486, | |
| "eval_f1_0": 0.9957041417243657, | |
| "eval_f1_1": 0.995681458910857, | |
| "eval_f1_2": 0.954885379973998, | |
| "eval_f1_3": 0.9982507288629737, | |
| "eval_loss": 0.030237887054681778, | |
| "eval_runtime": 53.3094, | |
| "eval_samples_per_second": 222.625, | |
| "eval_steps_per_second": 0.882, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.4723010617050355e-05, | |
| "loss": 0.0344, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.469673078944602e-05, | |
| "loss": 0.0149, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.467045096184169e-05, | |
| "loss": 0.0027, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.4644171134237364e-05, | |
| "loss": 0.0464, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.461789130663303e-05, | |
| "loss": 0.0428, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.45916114790287e-05, | |
| "loss": 0.0351, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.4565331651424366e-05, | |
| "loss": 0.0128, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.453905182382004e-05, | |
| "loss": 0.0105, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.451277199621571e-05, | |
| "loss": 0.0478, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.4486492168611374e-05, | |
| "loss": 0.0513, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.4460212341007046e-05, | |
| "loss": 0.0271, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.443393251340272e-05, | |
| "loss": 0.0166, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.440765268579838e-05, | |
| "loss": 0.015, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.4381372858194055e-05, | |
| "loss": 0.007, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.435509303058972e-05, | |
| "loss": 0.0584, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.432881320298539e-05, | |
| "loss": 0.0172, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.4302533375381063e-05, | |
| "loss": 0.0437, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.427625354777673e-05, | |
| "loss": 0.0336, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.42499737201724e-05, | |
| "loss": 0.0203, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.422369389256807e-05, | |
| "loss": 0.027, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_f1": 0.9852472343292917, | |
| "eval_f1_0": 0.9952913732973311, | |
| "eval_f1_1": 0.997427579949319, | |
| "eval_f1_2": 0.9496299706569509, | |
| "eval_f1_3": 0.9986400134135663, | |
| "eval_loss": 0.024080442264676094, | |
| "eval_runtime": 53.231, | |
| "eval_samples_per_second": 222.953, | |
| "eval_steps_per_second": 0.883, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.419741406496374e-05, | |
| "loss": 0.0407, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.417113423735941e-05, | |
| "loss": 0.014, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.4144854409755074e-05, | |
| "loss": 0.0247, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.4118574582150746e-05, | |
| "loss": 0.0303, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.409229475454642e-05, | |
| "loss": 0.0331, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.406601492694208e-05, | |
| "loss": 0.0375, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.403973509933775e-05, | |
| "loss": 0.0261, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.401345527173342e-05, | |
| "loss": 0.0215, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.3987175444129084e-05, | |
| "loss": 0.0314, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.3960895616524756e-05, | |
| "loss": 0.0146, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.393461578892043e-05, | |
| "loss": 0.0409, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.390833596131609e-05, | |
| "loss": 0.0082, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.3882056133711765e-05, | |
| "loss": 0.0079, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.385577630610743e-05, | |
| "loss": 0.0265, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.38294964785031e-05, | |
| "loss": 0.0169, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.380321665089877e-05, | |
| "loss": 0.0257, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.377693682329444e-05, | |
| "loss": 0.0228, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.375065699569011e-05, | |
| "loss": 0.0229, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.3724377168085775e-05, | |
| "loss": 0.0391, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.369809734048145e-05, | |
| "loss": 0.044, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_f1": 0.9704234487573644, | |
| "eval_f1_0": 0.9898767933598651, | |
| "eval_f1_1": 0.9964992059361791, | |
| "eval_f1_2": 0.8965189093239978, | |
| "eval_f1_3": 0.9987988864094153, | |
| "eval_loss": 0.04618235304951668, | |
| "eval_runtime": 53.2373, | |
| "eval_samples_per_second": 222.926, | |
| "eval_steps_per_second": 0.883, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.367181751287712e-05, | |
| "loss": 0.0112, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.3645537685272784e-05, | |
| "loss": 0.0091, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.3619257857668456e-05, | |
| "loss": 0.0111, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.359297803006413e-05, | |
| "loss": 0.0198, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.356669820245979e-05, | |
| "loss": 0.0113, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.3540418374855464e-05, | |
| "loss": 0.008, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.351413854725113e-05, | |
| "loss": 0.0132, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.34878587196468e-05, | |
| "loss": 0.0092, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.346157889204247e-05, | |
| "loss": 0.0577, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.343529906443814e-05, | |
| "loss": 0.0424, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.340901923683381e-05, | |
| "loss": 0.0248, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.338273940922948e-05, | |
| "loss": 0.0133, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.3356459581625146e-05, | |
| "loss": 0.0364, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.333017975402082e-05, | |
| "loss": 0.0215, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.330389992641648e-05, | |
| "loss": 0.0125, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.3277620098812155e-05, | |
| "loss": 0.0247, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.325134027120783e-05, | |
| "loss": 0.0077, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.322506044360349e-05, | |
| "loss": 0.0237, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.3198780615999164e-05, | |
| "loss": 0.0126, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.317250078839483e-05, | |
| "loss": 0.0208, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_f1": 0.9885796705027498, | |
| "eval_f1_0": 0.996397910953713, | |
| "eval_f1_1": 0.9976364871138669, | |
| "eval_f1_2": 0.9614016994854276, | |
| "eval_f1_3": 0.9988825844579914, | |
| "eval_loss": 0.019902125000953674, | |
| "eval_runtime": 53.2714, | |
| "eval_samples_per_second": 222.784, | |
| "eval_steps_per_second": 0.882, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.31462209607905e-05, | |
| "loss": 0.0108, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.311994113318617e-05, | |
| "loss": 0.0102, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.309366130558184e-05, | |
| "loss": 0.0036, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.306738147797751e-05, | |
| "loss": 0.0164, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.304110165037318e-05, | |
| "loss": 0.0393, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.3014821822768846e-05, | |
| "loss": 0.0416, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.298854199516452e-05, | |
| "loss": 0.0322, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.296226216756018e-05, | |
| "loss": 0.0081, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.293598233995585e-05, | |
| "loss": 0.0171, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.290970251235152e-05, | |
| "loss": 0.0094, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.2883422684747184e-05, | |
| "loss": 0.0077, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 0.0139, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.283086302953853e-05, | |
| "loss": 0.0382, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.280458320193419e-05, | |
| "loss": 0.025, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.2778303374329865e-05, | |
| "loss": 0.0164, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.275202354672554e-05, | |
| "loss": 0.0062, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.27257437191212e-05, | |
| "loss": 0.0452, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.2699463891516874e-05, | |
| "loss": 0.0113, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.267318406391254e-05, | |
| "loss": 0.0191, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.264690423630821e-05, | |
| "loss": 0.0258, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_f1": 0.9845698140822621, | |
| "eval_f1_0": 0.9953415768896084, | |
| "eval_f1_1": 0.9977729309531932, | |
| "eval_f1_2": 0.9464030597896395, | |
| "eval_f1_3": 0.9987616886966075, | |
| "eval_loss": 0.030071619898080826, | |
| "eval_runtime": 53.3053, | |
| "eval_samples_per_second": 222.642, | |
| "eval_steps_per_second": 0.882, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.262062440870388e-05, | |
| "loss": 0.0106, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.259434458109955e-05, | |
| "loss": 0.0339, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.256806475349522e-05, | |
| "loss": 0.0177, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.254178492589089e-05, | |
| "loss": 0.0256, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.2515505098286556e-05, | |
| "loss": 0.013, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.248922527068223e-05, | |
| "loss": 0.006, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.246294544307789e-05, | |
| "loss": 0.0279, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.2436665615473564e-05, | |
| "loss": 0.032, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.2410385787869236e-05, | |
| "loss": 0.0058, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.23841059602649e-05, | |
| "loss": 0.0086, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.235782613266057e-05, | |
| "loss": 0.0236, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.233154630505624e-05, | |
| "loss": 0.0085, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.230526647745191e-05, | |
| "loss": 0.0091, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.227898664984758e-05, | |
| "loss": 0.0068, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.2252706822243247e-05, | |
| "loss": 0.0123, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.222642699463892e-05, | |
| "loss": 0.0034, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.220014716703459e-05, | |
| "loss": 0.0324, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.2173867339430255e-05, | |
| "loss": 0.0166, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.214758751182593e-05, | |
| "loss": 0.02, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.212130768422159e-05, | |
| "loss": 0.0243, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_f1": 0.9758889262675604, | |
| "eval_f1_0": 0.9918425590078939, | |
| "eval_f1_1": 0.9975695123306997, | |
| "eval_f1_2": 0.915422437863551, | |
| "eval_f1_3": 0.9987211958680969, | |
| "eval_loss": 0.05459127202630043, | |
| "eval_runtime": 53.3211, | |
| "eval_samples_per_second": 222.576, | |
| "eval_steps_per_second": 0.881, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.2095027856617264e-05, | |
| "loss": 0.039, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.2068748029012936e-05, | |
| "loss": 0.0142, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.20424682014086e-05, | |
| "loss": 0.0372, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.201618837380427e-05, | |
| "loss": 0.028, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.1989908546199944e-05, | |
| "loss": 0.0376, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.196362871859561e-05, | |
| "loss": 0.0241, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.193734889099128e-05, | |
| "loss": 0.0144, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.1911069063386946e-05, | |
| "loss": 0.0169, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.188478923578262e-05, | |
| "loss": 0.0454, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.185850940817829e-05, | |
| "loss": 0.0381, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.1832229580573955e-05, | |
| "loss": 0.0355, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.180594975296962e-05, | |
| "loss": 0.0217, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.177966992536529e-05, | |
| "loss": 0.0134, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.1753390097760956e-05, | |
| "loss": 0.0426, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.172711027015663e-05, | |
| "loss": 0.0567, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.170083044255229e-05, | |
| "loss": 0.0236, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.1674550614947965e-05, | |
| "loss": 0.0253, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.164827078734364e-05, | |
| "loss": 0.025, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.16219909597393e-05, | |
| "loss": 0.0069, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.1595711132134974e-05, | |
| "loss": 0.0315, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_f1": 0.9905806489954309, | |
| "eval_f1_0": 0.997055908346431, | |
| "eval_f1_1": 0.997946052719898, | |
| "eval_f1_2": 0.9692890107108816, | |
| "eval_f1_3": 0.9980316242045127, | |
| "eval_loss": 0.019053112715482712, | |
| "eval_runtime": 53.2686, | |
| "eval_samples_per_second": 222.796, | |
| "eval_steps_per_second": 0.882, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.1569431304530645e-05, | |
| "loss": 0.0309, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.154315147692631e-05, | |
| "loss": 0.0152, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.151687164932198e-05, | |
| "loss": 0.0236, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.149059182171765e-05, | |
| "loss": 0.0144, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.146431199411332e-05, | |
| "loss": 0.0243, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.143803216650899e-05, | |
| "loss": 0.0146, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.1411752338904656e-05, | |
| "loss": 0.0079, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.138547251130033e-05, | |
| "loss": 0.0249, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.1359192683696e-05, | |
| "loss": 0.0031, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.1332912856091665e-05, | |
| "loss": 0.0055, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.1306633028487336e-05, | |
| "loss": 0.0027, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.1280353200883e-05, | |
| "loss": 0.0343, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.125407337327867e-05, | |
| "loss": 0.0534, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.1227793545674345e-05, | |
| "loss": 0.015, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.120151371807001e-05, | |
| "loss": 0.0439, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.117523389046568e-05, | |
| "loss": 0.0127, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.114895406286135e-05, | |
| "loss": 0.0342, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.112267423525702e-05, | |
| "loss": 0.037, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.109639440765269e-05, | |
| "loss": 0.0419, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.1070114580048355e-05, | |
| "loss": 0.0107, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_f1": 0.9901851558278552, | |
| "eval_f1_0": 0.9968996202993475, | |
| "eval_f1_1": 0.9974876010233711, | |
| "eval_f1_2": 0.967474022334325, | |
| "eval_f1_3": 0.9988793796543771, | |
| "eval_loss": 0.016864441335201263, | |
| "eval_runtime": 53.2892, | |
| "eval_samples_per_second": 222.709, | |
| "eval_steps_per_second": 0.882, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.104383475244403e-05, | |
| "loss": 0.0278, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.10175549248397e-05, | |
| "loss": 0.0091, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.0991275097235364e-05, | |
| "loss": 0.0264, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.0964995269631036e-05, | |
| "loss": 0.0202, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.09387154420267e-05, | |
| "loss": 0.0267, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.091243561442237e-05, | |
| "loss": 0.0211, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.0886155786818044e-05, | |
| "loss": 0.0127, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.085987595921371e-05, | |
| "loss": 0.0134, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.083359613160938e-05, | |
| "loss": 0.0136, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.080731630400505e-05, | |
| "loss": 0.0079, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.078103647640072e-05, | |
| "loss": 0.0278, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.075475664879639e-05, | |
| "loss": 0.0109, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.0728476821192055e-05, | |
| "loss": 0.003, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.070219699358773e-05, | |
| "loss": 0.0403, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.067591716598339e-05, | |
| "loss": 0.0177, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.064963733837906e-05, | |
| "loss": 0.0049, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.062335751077473e-05, | |
| "loss": 0.0047, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.05970776831704e-05, | |
| "loss": 0.0455, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.0570797855566065e-05, | |
| "loss": 0.0087, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.054451802796174e-05, | |
| "loss": 0.0077, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_f1": 0.9810789471197044, | |
| "eval_f1_0": 0.9943578765323313, | |
| "eval_f1_1": 0.9973014848536501, | |
| "eval_f1_2": 0.9337456942425872, | |
| "eval_f1_3": 0.998910732850249, | |
| "eval_loss": 0.029722465202212334, | |
| "eval_runtime": 53.2615, | |
| "eval_samples_per_second": 222.825, | |
| "eval_steps_per_second": 0.882, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.051823820035741e-05, | |
| "loss": 0.0127, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.0491958372753074e-05, | |
| "loss": 0.0151, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.0465678545148746e-05, | |
| "loss": 0.004, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.043939871754441e-05, | |
| "loss": 0.0059, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.041311888994008e-05, | |
| "loss": 0.004, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.0386839062335754e-05, | |
| "loss": 0.0211, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.036055923473142e-05, | |
| "loss": 0.025, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.033427940712709e-05, | |
| "loss": 0.0145, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.0307999579522756e-05, | |
| "loss": 0.0226, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.028171975191843e-05, | |
| "loss": 0.0197, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.02554399243141e-05, | |
| "loss": 0.0217, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.0229160096709765e-05, | |
| "loss": 0.019, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 4.0202880269105437e-05, | |
| "loss": 0.049, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 4.017660044150111e-05, | |
| "loss": 0.0171, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 4.015032061389677e-05, | |
| "loss": 0.0113, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 4.0124040786292445e-05, | |
| "loss": 0.0374, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 4.009776095868811e-05, | |
| "loss": 0.0166, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 4.007148113108378e-05, | |
| "loss": 0.0092, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 4.0045201303479454e-05, | |
| "loss": 0.0268, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.001892147587512e-05, | |
| "loss": 0.0303, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_f1": 0.9882341464888815, | |
| "eval_f1_0": 0.9962570940403399, | |
| "eval_f1_1": 0.9961376820738735, | |
| "eval_f1_2": 0.9616341565869263, | |
| "eval_f1_3": 0.9989076532543865, | |
| "eval_loss": 0.022203443571925163, | |
| "eval_runtime": 53.3025, | |
| "eval_samples_per_second": 222.654, | |
| "eval_steps_per_second": 0.882, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.999264164827079e-05, | |
| "loss": 0.0113, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.996636182066646e-05, | |
| "loss": 0.0202, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.994008199306213e-05, | |
| "loss": 0.0292, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.99138021654578e-05, | |
| "loss": 0.0194, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.9887522337853464e-05, | |
| "loss": 0.0077, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.9861242510249136e-05, | |
| "loss": 0.0038, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.983496268264481e-05, | |
| "loss": 0.0252, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.980868285504047e-05, | |
| "loss": 0.0037, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.9782403027436145e-05, | |
| "loss": 0.0271, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.975612319983181e-05, | |
| "loss": 0.0082, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.972984337222748e-05, | |
| "loss": 0.0241, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.970356354462315e-05, | |
| "loss": 0.024, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.967728371701882e-05, | |
| "loss": 0.037, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.965100388941449e-05, | |
| "loss": 0.009, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.962472406181016e-05, | |
| "loss": 0.0031, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.959844423420583e-05, | |
| "loss": 0.0348, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.957216440660149e-05, | |
| "loss": 0.0059, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.9545884578997164e-05, | |
| "loss": 0.0187, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.951960475139283e-05, | |
| "loss": 0.028, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.94933249237885e-05, | |
| "loss": 0.0062, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_f1": 0.988427145836447, | |
| "eval_f1_0": 0.9964087925447377, | |
| "eval_f1_1": 0.9971002048830786, | |
| "eval_f1_2": 0.9613856561134559, | |
| "eval_f1_3": 0.9988139298045158, | |
| "eval_loss": 0.019996481016278267, | |
| "eval_runtime": 53.2705, | |
| "eval_samples_per_second": 222.787, | |
| "eval_steps_per_second": 0.882, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.9467045096184165e-05, | |
| "loss": 0.0114, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.944076526857984e-05, | |
| "loss": 0.0051, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.941448544097551e-05, | |
| "loss": 0.0106, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.9388205613371174e-05, | |
| "loss": 0.0079, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.9361925785766846e-05, | |
| "loss": 0.0106, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.933564595816252e-05, | |
| "loss": 0.0048, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.930936613055818e-05, | |
| "loss": 0.0113, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.9283086302953854e-05, | |
| "loss": 0.0108, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.925680647534952e-05, | |
| "loss": 0.0289, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.923052664774519e-05, | |
| "loss": 0.0267, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.920424682014086e-05, | |
| "loss": 0.0063, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.917796699253653e-05, | |
| "loss": 0.0094, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.91516871649322e-05, | |
| "loss": 0.0061, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.912540733732787e-05, | |
| "loss": 0.0369, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.909912750972354e-05, | |
| "loss": 0.0138, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.907284768211921e-05, | |
| "loss": 0.0172, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.9046567854514874e-05, | |
| "loss": 0.0069, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.9020288026910545e-05, | |
| "loss": 0.0148, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.899400819930622e-05, | |
| "loss": 0.006, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.896772837170188e-05, | |
| "loss": 0.0244, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_f1": 0.9863014050948063, | |
| "eval_f1_0": 0.9958320662985854, | |
| "eval_f1_1": 0.9981034555423159, | |
| "eval_f1_2": 0.9524213321580693, | |
| "eval_f1_3": 0.9988487663802547, | |
| "eval_loss": 0.02705535478889942, | |
| "eval_runtime": 53.231, | |
| "eval_samples_per_second": 222.953, | |
| "eval_steps_per_second": 0.883, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.8941448544097554e-05, | |
| "loss": 0.0424, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.891516871649322e-05, | |
| "loss": 0.0091, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.01, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.886260906128456e-05, | |
| "loss": 0.0246, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.883632923368023e-05, | |
| "loss": 0.0163, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.88100494060759e-05, | |
| "loss": 0.007, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.878376957847157e-05, | |
| "loss": 0.0041, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.8757489750867236e-05, | |
| "loss": 0.0206, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.873120992326291e-05, | |
| "loss": 0.0287, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.870493009565857e-05, | |
| "loss": 0.0163, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.8678650268054245e-05, | |
| "loss": 0.0217, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.8652370440449917e-05, | |
| "loss": 0.0282, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.862609061284558e-05, | |
| "loss": 0.0044, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.8599810785241253e-05, | |
| "loss": 0.016, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.8573530957636925e-05, | |
| "loss": 0.005, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.854725113003259e-05, | |
| "loss": 0.0182, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.852097130242826e-05, | |
| "loss": 0.0237, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.849469147482393e-05, | |
| "loss": 0.0266, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.84684116472196e-05, | |
| "loss": 0.0332, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.8442131819615264e-05, | |
| "loss": 0.0123, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_f1": 0.9903904030593307, | |
| "eval_f1_0": 0.9969964432107575, | |
| "eval_f1_1": 0.9975871590904739, | |
| "eval_f1_2": 0.9680795194695785, | |
| "eval_f1_3": 0.9988984904665126, | |
| "eval_loss": 0.016976816579699516, | |
| "eval_runtime": 53.3087, | |
| "eval_samples_per_second": 222.628, | |
| "eval_steps_per_second": 0.882, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.841585199201093e-05, | |
| "loss": 0.0089, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.83895721644066e-05, | |
| "loss": 0.0202, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.836329233680227e-05, | |
| "loss": 0.0339, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.833701250919794e-05, | |
| "loss": 0.0285, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.831073268159361e-05, | |
| "loss": 0.0101, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.828445285398928e-05, | |
| "loss": 0.0385, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.8258173026384946e-05, | |
| "loss": 0.0088, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.823189319878062e-05, | |
| "loss": 0.0192, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.820561337117628e-05, | |
| "loss": 0.0084, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.8179333543571955e-05, | |
| "loss": 0.0431, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.8153053715967626e-05, | |
| "loss": 0.0348, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.812677388836329e-05, | |
| "loss": 0.019, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.810049406075896e-05, | |
| "loss": 0.054, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.807421423315463e-05, | |
| "loss": 0.0313, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.80479344055503e-05, | |
| "loss": 0.0209, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.802165457794597e-05, | |
| "loss": 0.0187, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.799537475034164e-05, | |
| "loss": 0.0177, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.796909492273731e-05, | |
| "loss": 0.0175, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.794281509513298e-05, | |
| "loss": 0.0075, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.7916535267528646e-05, | |
| "loss": 0.0428, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_f1": 0.9895366266198364, | |
| "eval_f1_0": 0.9966985214279319, | |
| "eval_f1_1": 0.9971627586656717, | |
| "eval_f1_2": 0.9655738501665956, | |
| "eval_f1_3": 0.9987113762191467, | |
| "eval_loss": 0.017527282238006592, | |
| "eval_runtime": 53.24, | |
| "eval_samples_per_second": 222.915, | |
| "eval_steps_per_second": 0.883, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.789025543992432e-05, | |
| "loss": 0.0396, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.786397561231998e-05, | |
| "loss": 0.0139, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.7837695784715654e-05, | |
| "loss": 0.0222, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.7811415957111326e-05, | |
| "loss": 0.0363, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.778513612950699e-05, | |
| "loss": 0.0023, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.775885630190266e-05, | |
| "loss": 0.053, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.7732576474298335e-05, | |
| "loss": 0.009, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.7706296646694e-05, | |
| "loss": 0.0065, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.768001681908967e-05, | |
| "loss": 0.032, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.7653736991485336e-05, | |
| "loss": 0.0351, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.762745716388101e-05, | |
| "loss": 0.0168, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.760117733627668e-05, | |
| "loss": 0.0149, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.7574897508672345e-05, | |
| "loss": 0.0114, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.754861768106802e-05, | |
| "loss": 0.0131, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.752233785346368e-05, | |
| "loss": 0.0143, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.7496058025859354e-05, | |
| "loss": 0.0167, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.7469778198255025e-05, | |
| "loss": 0.027, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.744349837065069e-05, | |
| "loss": 0.0247, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.741721854304636e-05, | |
| "loss": 0.012, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.7390938715442034e-05, | |
| "loss": 0.0144, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_f1": 0.9902148365969656, | |
| "eval_f1_0": 0.9968994883788248, | |
| "eval_f1_1": 0.997476188470126, | |
| "eval_f1_2": 0.968282018778334, | |
| "eval_f1_3": 0.9982016507605777, | |
| "eval_loss": 0.016549725085496902, | |
| "eval_runtime": 53.2744, | |
| "eval_samples_per_second": 222.771, | |
| "eval_steps_per_second": 0.882, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.73646588878377e-05, | |
| "loss": 0.0109, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.733837906023337e-05, | |
| "loss": 0.0063, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.7312099232629036e-05, | |
| "loss": 0.0207, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.72858194050247e-05, | |
| "loss": 0.0088, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.725953957742037e-05, | |
| "loss": 0.0513, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.723325974981604e-05, | |
| "loss": 0.0525, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.720697992221171e-05, | |
| "loss": 0.0101, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.718070009460738e-05, | |
| "loss": 0.0406, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.7154420267003046e-05, | |
| "loss": 0.0115, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.712814043939872e-05, | |
| "loss": 0.0174, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.710186061179439e-05, | |
| "loss": 0.0199, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.7075580784190055e-05, | |
| "loss": 0.0089, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.704930095658573e-05, | |
| "loss": 0.0172, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.702302112898139e-05, | |
| "loss": 0.0235, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.6996741301377063e-05, | |
| "loss": 0.0181, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.6970461473772735e-05, | |
| "loss": 0.0254, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.69441816461684e-05, | |
| "loss": 0.0115, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.691790181856407e-05, | |
| "loss": 0.0048, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.689162199095974e-05, | |
| "loss": 0.0312, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.686534216335541e-05, | |
| "loss": 0.032, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_f1": 0.990454296038497, | |
| "eval_f1_0": 0.9969953781583025, | |
| "eval_f1_1": 0.9976073527889294, | |
| "eval_f1_2": 0.9686538913295297, | |
| "eval_f1_3": 0.9985605618772259, | |
| "eval_loss": 0.01473264116793871, | |
| "eval_runtime": 53.276, | |
| "eval_samples_per_second": 222.764, | |
| "eval_steps_per_second": 0.882, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.683906233575108e-05, | |
| "loss": 0.0064, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.6812782508146746e-05, | |
| "loss": 0.0431, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.678650268054242e-05, | |
| "loss": 0.0349, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.676022285293809e-05, | |
| "loss": 0.0249, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.6733943025333754e-05, | |
| "loss": 0.0143, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.6707663197729426e-05, | |
| "loss": 0.0292, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.668138337012509e-05, | |
| "loss": 0.0099, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.665510354252076e-05, | |
| "loss": 0.0101, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.6628823714916435e-05, | |
| "loss": 0.0081, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.66025438873121e-05, | |
| "loss": 0.0236, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.657626405970777e-05, | |
| "loss": 0.0386, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.654998423210344e-05, | |
| "loss": 0.0038, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.652370440449911e-05, | |
| "loss": 0.0098, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.649742457689478e-05, | |
| "loss": 0.011, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.6471144749290445e-05, | |
| "loss": 0.0199, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.644486492168612e-05, | |
| "loss": 0.0083, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.641858509408179e-05, | |
| "loss": 0.0516, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.6392305266477454e-05, | |
| "loss": 0.0181, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.6366025438873126e-05, | |
| "loss": 0.0135, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.633974561126879e-05, | |
| "loss": 0.0062, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_f1": 0.9828945374124921, | |
| "eval_f1_0": 0.9944139820295181, | |
| "eval_f1_1": 0.9976678621430353, | |
| "eval_f1_2": 0.9408426096553008, | |
| "eval_f1_3": 0.9986536958221142, | |
| "eval_loss": 0.030880145728588104, | |
| "eval_runtime": 53.2137, | |
| "eval_samples_per_second": 223.025, | |
| "eval_steps_per_second": 0.883, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.631346578366446e-05, | |
| "loss": 0.0288, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.6287185956060134e-05, | |
| "loss": 0.017, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.62609061284558e-05, | |
| "loss": 0.0222, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.623462630085147e-05, | |
| "loss": 0.0193, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.6208346473247136e-05, | |
| "loss": 0.007, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.61820666456428e-05, | |
| "loss": 0.014, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.615578681803847e-05, | |
| "loss": 0.0117, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.6129506990434145e-05, | |
| "loss": 0.0147, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.610322716282981e-05, | |
| "loss": 0.0028, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.607694733522548e-05, | |
| "loss": 0.0376, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.6050667507621146e-05, | |
| "loss": 0.0256, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.602438768001682e-05, | |
| "loss": 0.0355, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.599810785241249e-05, | |
| "loss": 0.0048, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.5971828024808155e-05, | |
| "loss": 0.0036, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.594554819720383e-05, | |
| "loss": 0.0072, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.59192683695995e-05, | |
| "loss": 0.002, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.5892988541995164e-05, | |
| "loss": 0.0043, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.5866708714390835e-05, | |
| "loss": 0.0231, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.58404288867865e-05, | |
| "loss": 0.0042, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.581414905918217e-05, | |
| "loss": 0.0131, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_f1": 0.9884566672672931, | |
| "eval_f1_0": 0.9963622833229593, | |
| "eval_f1_1": 0.9980315149284771, | |
| "eval_f1_2": 0.9604415328973178, | |
| "eval_f1_3": 0.9989913379204182, | |
| "eval_loss": 0.022728843614459038, | |
| "eval_runtime": 53.2888, | |
| "eval_samples_per_second": 222.711, | |
| "eval_steps_per_second": 0.882, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.5787869231577844e-05, | |
| "loss": 0.0559, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.576158940397351e-05, | |
| "loss": 0.0036, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.573530957636918e-05, | |
| "loss": 0.0274, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.570902974876485e-05, | |
| "loss": 0.0248, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.568274992116052e-05, | |
| "loss": 0.0219, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.565647009355619e-05, | |
| "loss": 0.013, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.5630190265951855e-05, | |
| "loss": 0.0144, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.5603910438347526e-05, | |
| "loss": 0.0284, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.55776306107432e-05, | |
| "loss": 0.0067, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.555135078313886e-05, | |
| "loss": 0.0123, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.5525070955534535e-05, | |
| "loss": 0.0276, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.54987911279302e-05, | |
| "loss": 0.0761, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.547251130032587e-05, | |
| "loss": 0.0257, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.5446231472721544e-05, | |
| "loss": 0.0064, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.541995164511721e-05, | |
| "loss": 0.0103, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.539367181751288e-05, | |
| "loss": 0.0039, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.536739198990855e-05, | |
| "loss": 0.0103, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.534111216230422e-05, | |
| "loss": 0.0119, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.531483233469989e-05, | |
| "loss": 0.0133, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.5288552507095554e-05, | |
| "loss": 0.0376, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_f1": 0.993526599442064, | |
| "eval_f1_0": 0.9979671388907684, | |
| "eval_f1_1": 0.9974921281221278, | |
| "eval_f1_2": 0.9797800483751776, | |
| "eval_f1_3": 0.9988670823801823, | |
| "eval_loss": 0.013585735112428665, | |
| "eval_runtime": 53.2249, | |
| "eval_samples_per_second": 222.978, | |
| "eval_steps_per_second": 0.883, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.5262272679491226e-05, | |
| "loss": 0.0107, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.52359928518869e-05, | |
| "loss": 0.0116, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.520971302428256e-05, | |
| "loss": 0.0099, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.5183433196678234e-05, | |
| "loss": 0.0122, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.5157153369073906e-05, | |
| "loss": 0.0059, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.513087354146957e-05, | |
| "loss": 0.0271, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.510459371386524e-05, | |
| "loss": 0.004, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.507831388626091e-05, | |
| "loss": 0.0079, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.505203405865657e-05, | |
| "loss": 0.0117, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.5025754231052245e-05, | |
| "loss": 0.007, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.499947440344791e-05, | |
| "loss": 0.0051, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.497319457584358e-05, | |
| "loss": 0.0025, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.4946914748239253e-05, | |
| "loss": 0.0085, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.492063492063492e-05, | |
| "loss": 0.0136, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.489435509303059e-05, | |
| "loss": 0.0093, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.486807526542626e-05, | |
| "loss": 0.0068, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.484179543782193e-05, | |
| "loss": 0.0195, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.48155156102176e-05, | |
| "loss": 0.0055, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.4789235782613264e-05, | |
| "loss": 0.0026, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.4762955955008936e-05, | |
| "loss": 0.0057, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_f1": 0.9890607674618193, | |
| "eval_f1_0": 0.9964958601655933, | |
| "eval_f1_1": 0.998043157856405, | |
| "eval_f1_2": 0.9627095323146126, | |
| "eval_f1_3": 0.9989945195106662, | |
| "eval_loss": 0.022696251049637794, | |
| "eval_runtime": 53.3706, | |
| "eval_samples_per_second": 222.37, | |
| "eval_steps_per_second": 0.881, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.473667612740461e-05, | |
| "loss": 0.0086, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.471039629980027e-05, | |
| "loss": 0.0041, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.4684116472195944e-05, | |
| "loss": 0.0027, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.465783664459161e-05, | |
| "loss": 0.0026, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.463155681698728e-05, | |
| "loss": 0.0076, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.460527698938295e-05, | |
| "loss": 0.007, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.457899716177862e-05, | |
| "loss": 0.007, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.455271733417429e-05, | |
| "loss": 0.0106, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.452643750656996e-05, | |
| "loss": 0.0392, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.4500157678965626e-05, | |
| "loss": 0.0118, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.44738778513613e-05, | |
| "loss": 0.0076, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.444759802375696e-05, | |
| "loss": 0.0029, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.4421318196152635e-05, | |
| "loss": 0.0113, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.439503836854831e-05, | |
| "loss": 0.0094, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.436875854094397e-05, | |
| "loss": 0.0056, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.4342478713339644e-05, | |
| "loss": 0.0026, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.4316198885735316e-05, | |
| "loss": 0.0025, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.428991905813098e-05, | |
| "loss": 0.002, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.426363923052665e-05, | |
| "loss": 0.0022, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.423735940292232e-05, | |
| "loss": 0.034, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_f1": 0.9919433062214094, | |
| "eval_f1_0": 0.9974867382429036, | |
| "eval_f1_1": 0.9978065802592223, | |
| "eval_f1_2": 0.973924993088714, | |
| "eval_f1_3": 0.9985549132947977, | |
| "eval_loss": 0.016368065029382706, | |
| "eval_runtime": 53.1976, | |
| "eval_samples_per_second": 223.093, | |
| "eval_steps_per_second": 0.883, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.421107957531799e-05, | |
| "loss": 0.0051, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.418479974771366e-05, | |
| "loss": 0.0312, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.4158519920109326e-05, | |
| "loss": 0.0208, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.4132240092505e-05, | |
| "loss": 0.0029, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.410596026490066e-05, | |
| "loss": 0.0051, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.4079680437296335e-05, | |
| "loss": 0.0193, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.4053400609692006e-05, | |
| "loss": 0.0017, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.402712078208767e-05, | |
| "loss": 0.0066, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.400084095448334e-05, | |
| "loss": 0.0093, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.3974561126879015e-05, | |
| "loss": 0.0198, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.394828129927467e-05, | |
| "loss": 0.0127, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.3922001471670345e-05, | |
| "loss": 0.0038, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.389572164406602e-05, | |
| "loss": 0.0057, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.386944181646168e-05, | |
| "loss": 0.0129, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.3843161988857354e-05, | |
| "loss": 0.0208, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.381688216125302e-05, | |
| "loss": 0.0145, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.379060233364869e-05, | |
| "loss": 0.0099, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.376432250604436e-05, | |
| "loss": 0.0067, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.373804267844003e-05, | |
| "loss": 0.0206, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.37117628508357e-05, | |
| "loss": 0.0048, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_f1": 0.9929448755536868, | |
| "eval_f1_0": 0.9977800469610748, | |
| "eval_f1_1": 0.9977676687864864, | |
| "eval_f1_2": 0.9772373793014063, | |
| "eval_f1_3": 0.99899440716578, | |
| "eval_loss": 0.013158726505935192, | |
| "eval_runtime": 53.2712, | |
| "eval_samples_per_second": 222.785, | |
| "eval_steps_per_second": 0.882, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.368548302323137e-05, | |
| "loss": 0.0036, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.3659203195627036e-05, | |
| "loss": 0.0071, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.363292336802271e-05, | |
| "loss": 0.0334, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.360664354041837e-05, | |
| "loss": 0.0117, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.3580363712814044e-05, | |
| "loss": 0.0033, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.3554083885209716e-05, | |
| "loss": 0.0229, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.352780405760538e-05, | |
| "loss": 0.0112, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.350152423000105e-05, | |
| "loss": 0.0052, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.3475244402396725e-05, | |
| "loss": 0.0048, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.344896457479239e-05, | |
| "loss": 0.0059, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.342268474718806e-05, | |
| "loss": 0.0227, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.339640491958373e-05, | |
| "loss": 0.0036, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.33701250919794e-05, | |
| "loss": 0.0157, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.334384526437507e-05, | |
| "loss": 0.01, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.3317565436770735e-05, | |
| "loss": 0.0048, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.329128560916641e-05, | |
| "loss": 0.0025, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.326500578156207e-05, | |
| "loss": 0.0375, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.3238725953957744e-05, | |
| "loss": 0.0029, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.3212446126353416e-05, | |
| "loss": 0.0149, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.318616629874908e-05, | |
| "loss": 0.0039, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_f1": 0.9916341644454706, | |
| "eval_f1_0": 0.9974010783434527, | |
| "eval_f1_1": 0.9975840038042353, | |
| "eval_f1_2": 0.9729829725969001, | |
| "eval_f1_3": 0.998568603037294, | |
| "eval_loss": 0.01573426090180874, | |
| "eval_runtime": 53.3004, | |
| "eval_samples_per_second": 222.662, | |
| "eval_steps_per_second": 0.882, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.315988647114475e-05, | |
| "loss": 0.0061, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.3133606643540424e-05, | |
| "loss": 0.0081, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.310732681593609e-05, | |
| "loss": 0.0066, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.308104698833176e-05, | |
| "loss": 0.0151, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.3054767160727426e-05, | |
| "loss": 0.0083, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.30284873331231e-05, | |
| "loss": 0.0202, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.300220750551877e-05, | |
| "loss": 0.0239, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.2975927677914435e-05, | |
| "loss": 0.0037, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.2949647850310107e-05, | |
| "loss": 0.0192, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.292336802270578e-05, | |
| "loss": 0.0313, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.289708819510144e-05, | |
| "loss": 0.0158, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.2870808367497115e-05, | |
| "loss": 0.009, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.284452853989278e-05, | |
| "loss": 0.0101, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.2818248712288445e-05, | |
| "loss": 0.0055, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.279196888468412e-05, | |
| "loss": 0.006, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.276568905707978e-05, | |
| "loss": 0.0142, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.2739409229475454e-05, | |
| "loss": 0.0434, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.2713129401871126e-05, | |
| "loss": 0.0362, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.268684957426679e-05, | |
| "loss": 0.011, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.266056974666246e-05, | |
| "loss": 0.007, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_f1": 0.9892171960228088, | |
| "eval_f1_0": 0.9965462294958259, | |
| "eval_f1_1": 0.9981181971485513, | |
| "eval_f1_2": 0.9632160634438539, | |
| "eval_f1_3": 0.998988294003004, | |
| "eval_loss": 0.01945377327501774, | |
| "eval_runtime": 53.2216, | |
| "eval_samples_per_second": 222.992, | |
| "eval_steps_per_second": 0.883, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.2634289919058134e-05, | |
| "loss": 0.0128, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.26080100914538e-05, | |
| "loss": 0.0029, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.258173026384947e-05, | |
| "loss": 0.0025, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.2555450436245136e-05, | |
| "loss": 0.0301, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.252917060864081e-05, | |
| "loss": 0.0211, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.250289078103648e-05, | |
| "loss": 0.0018, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.2476610953432145e-05, | |
| "loss": 0.0047, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.2450331125827816e-05, | |
| "loss": 0.0255, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.242405129822348e-05, | |
| "loss": 0.0257, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.239777147061915e-05, | |
| "loss": 0.01, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.2371491643014825e-05, | |
| "loss": 0.004, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.234521181541049e-05, | |
| "loss": 0.0159, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.231893198780616e-05, | |
| "loss": 0.011, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.2292652160201834e-05, | |
| "loss": 0.0043, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.22663723325975e-05, | |
| "loss": 0.0137, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.224009250499317e-05, | |
| "loss": 0.0147, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.2213812677388835e-05, | |
| "loss": 0.0112, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.218753284978451e-05, | |
| "loss": 0.028, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.216125302218018e-05, | |
| "loss": 0.0033, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.2134973194575844e-05, | |
| "loss": 0.0038, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_f1": 0.9918897161538254, | |
| "eval_f1_0": 0.9974320930508085, | |
| "eval_f1_1": 0.9974823323244807, | |
| "eval_f1_2": 0.973631544785807, | |
| "eval_f1_3": 0.9990128944542052, | |
| "eval_loss": 0.014562987722456455, | |
| "eval_runtime": 53.2607, | |
| "eval_samples_per_second": 222.829, | |
| "eval_steps_per_second": 0.882, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.2108693366971516e-05, | |
| "loss": 0.0061, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.208241353936719e-05, | |
| "loss": 0.0074, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.205613371176285e-05, | |
| "loss": 0.0038, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.2029853884158525e-05, | |
| "loss": 0.0479, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.200357405655419e-05, | |
| "loss": 0.004, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.197729422894986e-05, | |
| "loss": 0.0063, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.195101440134553e-05, | |
| "loss": 0.0077, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.19247345737412e-05, | |
| "loss": 0.0057, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.189845474613687e-05, | |
| "loss": 0.0138, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.1872174918532535e-05, | |
| "loss": 0.0082, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.184589509092821e-05, | |
| "loss": 0.0029, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.181961526332388e-05, | |
| "loss": 0.0063, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.1793335435719544e-05, | |
| "loss": 0.0097, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.1767055608115215e-05, | |
| "loss": 0.0068, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.174077578051089e-05, | |
| "loss": 0.0042, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.1714495952906545e-05, | |
| "loss": 0.0115, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.168821612530222e-05, | |
| "loss": 0.008, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.166193629769789e-05, | |
| "loss": 0.014, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.1635656470093554e-05, | |
| "loss": 0.0083, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.1609376642489226e-05, | |
| "loss": 0.0364, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_f1": 0.9886284900276143, | |
| "eval_f1_0": 0.9963441156646601, | |
| "eval_f1_1": 0.9983078229174053, | |
| "eval_f1_2": 0.9608833388103845, | |
| "eval_f1_3": 0.9989786827180078, | |
| "eval_loss": 0.019674289971590042, | |
| "eval_runtime": 53.2331, | |
| "eval_samples_per_second": 222.944, | |
| "eval_steps_per_second": 0.883, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.158309681488489e-05, | |
| "loss": 0.0109, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.155681698728056e-05, | |
| "loss": 0.0038, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.1530537159676234e-05, | |
| "loss": 0.0111, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.15042573320719e-05, | |
| "loss": 0.0054, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.147797750446757e-05, | |
| "loss": 0.0022, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.145169767686324e-05, | |
| "loss": 0.0193, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.142541784925891e-05, | |
| "loss": 0.0041, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.139913802165458e-05, | |
| "loss": 0.0039, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.1372858194050245e-05, | |
| "loss": 0.0022, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.134657836644592e-05, | |
| "loss": 0.0043, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.132029853884159e-05, | |
| "loss": 0.0013, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.1294018711237253e-05, | |
| "loss": 0.0074, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.1267738883632925e-05, | |
| "loss": 0.0187, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.124145905602859e-05, | |
| "loss": 0.0279, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.121517922842426e-05, | |
| "loss": 0.0279, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.1188899400819934e-05, | |
| "loss": 0.0239, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.11626195732156e-05, | |
| "loss": 0.0121, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.113633974561127e-05, | |
| "loss": 0.0039, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.111005991800694e-05, | |
| "loss": 0.0031, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.108378009040261e-05, | |
| "loss": 0.0024, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_f1": 0.9887433509677206, | |
| "eval_f1_0": 0.9963817531063979, | |
| "eval_f1_1": 0.9976038783023411, | |
| "eval_f1_2": 0.9621265373211634, | |
| "eval_f1_3": 0.9988612351409804, | |
| "eval_loss": 0.021771090105175972, | |
| "eval_runtime": 53.2034, | |
| "eval_samples_per_second": 223.069, | |
| "eval_steps_per_second": 0.883, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.105750026279828e-05, | |
| "loss": 0.0024, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.1031220435193944e-05, | |
| "loss": 0.0189, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.1004940607589616e-05, | |
| "loss": 0.0027, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.097866077998529e-05, | |
| "loss": 0.0056, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.095238095238095e-05, | |
| "loss": 0.0026, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.0926101124776625e-05, | |
| "loss": 0.0143, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.0899821297172296e-05, | |
| "loss": 0.0223, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.087354146956796e-05, | |
| "loss": 0.0057, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.084726164196363e-05, | |
| "loss": 0.0109, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.08209818143593e-05, | |
| "loss": 0.0019, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.079470198675497e-05, | |
| "loss": 0.0026, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.076842215915064e-05, | |
| "loss": 0.0338, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.074214233154631e-05, | |
| "loss": 0.0014, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.071586250394198e-05, | |
| "loss": 0.0018, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.0689582676337644e-05, | |
| "loss": 0.0016, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.0663302848733316e-05, | |
| "loss": 0.001, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.063702302112899e-05, | |
| "loss": 0.0039, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.061074319352465e-05, | |
| "loss": 0.0014, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.058446336592032e-05, | |
| "loss": 0.0062, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.055818353831599e-05, | |
| "loss": 0.0134, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_f1": 0.9928095103361599, | |
| "eval_f1_0": 0.9977114497558153, | |
| "eval_f1_1": 0.9976035367962546, | |
| "eval_f1_2": 0.9769068149909359, | |
| "eval_f1_3": 0.9990162398016342, | |
| "eval_loss": 0.017637787386775017, | |
| "eval_runtime": 53.3768, | |
| "eval_samples_per_second": 222.344, | |
| "eval_steps_per_second": 0.881, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.0531903710711654e-05, | |
| "loss": 0.0014, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.0505623883107326e-05, | |
| "loss": 0.0336, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.0479344055502994e-05, | |
| "loss": 0.0107, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.0453064227898663e-05, | |
| "loss": 0.016, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.0426784400294335e-05, | |
| "loss": 0.0037, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.0400504572690003e-05, | |
| "loss": 0.0154, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.037422474508567e-05, | |
| "loss": 0.0022, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.034794491748134e-05, | |
| "loss": 0.0081, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.032166508987701e-05, | |
| "loss": 0.0065, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.029538526227268e-05, | |
| "loss": 0.0083, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.026910543466835e-05, | |
| "loss": 0.0056, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.0242825607064017e-05, | |
| "loss": 0.0091, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.021654577945969e-05, | |
| "loss": 0.0632, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.0190265951855357e-05, | |
| "loss": 0.0235, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.0163986124251025e-05, | |
| "loss": 0.0332, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.0137706296646694e-05, | |
| "loss": 0.0091, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.0111426469042366e-05, | |
| "loss": 0.0224, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.0085146641438034e-05, | |
| "loss": 0.0069, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.0058866813833702e-05, | |
| "loss": 0.0048, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.003258698622937e-05, | |
| "loss": 0.0038, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_f1": 0.992581772092419, | |
| "eval_f1_0": 0.9976360293746089, | |
| "eval_f1_1": 0.9976559952429364, | |
| "eval_f1_2": 0.9759631250352142, | |
| "eval_f1_3": 0.9990719387169165, | |
| "eval_loss": 0.01393364742398262, | |
| "eval_runtime": 53.2808, | |
| "eval_samples_per_second": 222.745, | |
| "eval_steps_per_second": 0.882, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.0006307158625043e-05, | |
| "loss": 0.0254, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.998002733102071e-05, | |
| "loss": 0.0098, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.995374750341638e-05, | |
| "loss": 0.0032, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.9927467675812048e-05, | |
| "loss": 0.0347, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.9901187848207716e-05, | |
| "loss": 0.0425, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.9874908020603388e-05, | |
| "loss": 0.0113, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.9848628192999056e-05, | |
| "loss": 0.0147, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.9822348365394725e-05, | |
| "loss": 0.0162, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.9796068537790393e-05, | |
| "loss": 0.003, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.9769788710186065e-05, | |
| "loss": 0.0043, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.9743508882581733e-05, | |
| "loss": 0.0419, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.9717229054977402e-05, | |
| "loss": 0.0136, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.969094922737307e-05, | |
| "loss": 0.0033, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.9664669399768742e-05, | |
| "loss": 0.0092, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.963838957216441e-05, | |
| "loss": 0.0058, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.961210974456008e-05, | |
| "loss": 0.0106, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.9585829916955747e-05, | |
| "loss": 0.0117, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.955955008935142e-05, | |
| "loss": 0.0127, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.9533270261747088e-05, | |
| "loss": 0.0265, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.9506990434142756e-05, | |
| "loss": 0.0076, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_f1": 0.9915600607688022, | |
| "eval_f1_0": 0.99730177462593, | |
| "eval_f1_1": 0.9973903465849999, | |
| "eval_f1_2": 0.9726622709368042, | |
| "eval_f1_3": 0.9988858509274748, | |
| "eval_loss": 0.015386177226901054, | |
| "eval_runtime": 53.1887, | |
| "eval_samples_per_second": 223.13, | |
| "eval_steps_per_second": 0.884, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.948071060653842e-05, | |
| "loss": 0.0312, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.945443077893409e-05, | |
| "loss": 0.0111, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.9428150951329758e-05, | |
| "loss": 0.0195, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.9401871123725426e-05, | |
| "loss": 0.0067, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.9375591296121098e-05, | |
| "loss": 0.0074, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.9349311468516766e-05, | |
| "loss": 0.0023, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 2.9323031640912435e-05, | |
| "loss": 0.0057, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 2.9296751813308103e-05, | |
| "loss": 0.0023, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 2.9270471985703775e-05, | |
| "loss": 0.0024, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.9244192158099443e-05, | |
| "loss": 0.0092, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.9217912330495112e-05, | |
| "loss": 0.0227, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.919163250289078e-05, | |
| "loss": 0.0227, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.9165352675286452e-05, | |
| "loss": 0.0139, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.913907284768212e-05, | |
| "loss": 0.0138, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.911279302007779e-05, | |
| "loss": 0.0035, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.9086513192473457e-05, | |
| "loss": 0.003, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.9060233364869126e-05, | |
| "loss": 0.0088, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.9033953537264797e-05, | |
| "loss": 0.0132, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.9007673709660466e-05, | |
| "loss": 0.025, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.8981393882056134e-05, | |
| "loss": 0.0059, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_f1": 0.9888104160718681, | |
| "eval_f1_0": 0.996429967486629, | |
| "eval_f1_1": 0.9980465417654738, | |
| "eval_f1_2": 0.9619597795354117, | |
| "eval_f1_3": 0.9988053754999581, | |
| "eval_loss": 0.021114500239491463, | |
| "eval_runtime": 53.2111, | |
| "eval_samples_per_second": 223.036, | |
| "eval_steps_per_second": 0.883, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.8955114054451803e-05, | |
| "loss": 0.0153, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.8928834226847474e-05, | |
| "loss": 0.021, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.8902554399243143e-05, | |
| "loss": 0.0072, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.887627457163881e-05, | |
| "loss": 0.0058, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.884999474403448e-05, | |
| "loss": 0.0048, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.882371491643015e-05, | |
| "loss": 0.0198, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.879743508882582e-05, | |
| "loss": 0.0062, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.8771155261221488e-05, | |
| "loss": 0.0024, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.8744875433617157e-05, | |
| "loss": 0.012, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.871859560601283e-05, | |
| "loss": 0.0047, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.8692315778408497e-05, | |
| "loss": 0.0029, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.8666035950804165e-05, | |
| "loss": 0.0566, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 2.8639756123199834e-05, | |
| "loss": 0.0027, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 2.8613476295595505e-05, | |
| "loss": 0.0017, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 2.8587196467991174e-05, | |
| "loss": 0.0018, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 2.8560916640386842e-05, | |
| "loss": 0.0016, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.853463681278251e-05, | |
| "loss": 0.0131, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.850835698517818e-05, | |
| "loss": 0.0104, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.848207715757385e-05, | |
| "loss": 0.0016, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.845579732996952e-05, | |
| "loss": 0.0143, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_f1": 0.992309982102097, | |
| "eval_f1_0": 0.9975932931756409, | |
| "eval_f1_1": 0.9982769954941229, | |
| "eval_f1_2": 0.9744498013904024, | |
| "eval_f1_3": 0.9989198383482217, | |
| "eval_loss": 0.013883223757147789, | |
| "eval_runtime": 53.1516, | |
| "eval_samples_per_second": 223.286, | |
| "eval_steps_per_second": 0.884, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.8429517502365188e-05, | |
| "loss": 0.0019, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.8403237674760856e-05, | |
| "loss": 0.0101, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.8376957847156528e-05, | |
| "loss": 0.002, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.835067801955219e-05, | |
| "loss": 0.0161, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.8324398191947858e-05, | |
| "loss": 0.0182, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.829811836434353e-05, | |
| "loss": 0.0153, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.8271838536739198e-05, | |
| "loss": 0.0102, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.8245558709134867e-05, | |
| "loss": 0.0021, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.8219278881530535e-05, | |
| "loss": 0.0038, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.8192999053926207e-05, | |
| "loss": 0.0119, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.8166719226321875e-05, | |
| "loss": 0.0234, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.8140439398717544e-05, | |
| "loss": 0.0144, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.8114159571113212e-05, | |
| "loss": 0.0058, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.8087879743508884e-05, | |
| "loss": 0.0023, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.8061599915904552e-05, | |
| "loss": 0.006, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.803532008830022e-05, | |
| "loss": 0.0049, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.800904026069589e-05, | |
| "loss": 0.0062, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.798276043309156e-05, | |
| "loss": 0.0052, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.795648060548723e-05, | |
| "loss": 0.0271, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.7930200777882898e-05, | |
| "loss": 0.025, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_f1": 0.9851190515011226, | |
| "eval_f1_0": 0.9951271173259356, | |
| "eval_f1_1": 0.997643322972136, | |
| "eval_f1_2": 0.9486835194800827, | |
| "eval_f1_3": 0.9990222462263361, | |
| "eval_loss": 0.028859291225671768, | |
| "eval_runtime": 53.2539, | |
| "eval_samples_per_second": 222.857, | |
| "eval_steps_per_second": 0.883, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.7903920950278566e-05, | |
| "loss": 0.0183, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.7877641122674238e-05, | |
| "loss": 0.0018, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.7851361295069906e-05, | |
| "loss": 0.0129, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.7825081467465575e-05, | |
| "loss": 0.002, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.7798801639861243e-05, | |
| "loss": 0.0016, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.777252181225691e-05, | |
| "loss": 0.004, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.7746241984652583e-05, | |
| "loss": 0.018, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.771996215704825e-05, | |
| "loss": 0.0204, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.769368232944392e-05, | |
| "loss": 0.0038, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.766740250183959e-05, | |
| "loss": 0.0214, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.764112267423526e-05, | |
| "loss": 0.0058, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.761484284663093e-05, | |
| "loss": 0.009, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.7588563019026597e-05, | |
| "loss": 0.0037, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.7562283191422265e-05, | |
| "loss": 0.0013, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.7536003363817937e-05, | |
| "loss": 0.0104, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.7509723536213606e-05, | |
| "loss": 0.0013, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.7483443708609274e-05, | |
| "loss": 0.0029, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.7457163881004942e-05, | |
| "loss": 0.0227, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.7430884053400614e-05, | |
| "loss": 0.008, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.7404604225796283e-05, | |
| "loss": 0.0036, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_f1": 0.9923914591031513, | |
| "eval_f1_0": 0.997616570752836, | |
| "eval_f1_1": 0.9979790000076698, | |
| "eval_f1_2": 0.9749075484011311, | |
| "eval_f1_3": 0.9990627172509683, | |
| "eval_loss": 0.013414908200502396, | |
| "eval_runtime": 53.3061, | |
| "eval_samples_per_second": 222.639, | |
| "eval_steps_per_second": 0.882, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.737832439819195e-05, | |
| "loss": 0.002, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.735204457058762e-05, | |
| "loss": 0.0192, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.732576474298329e-05, | |
| "loss": 0.003, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.729948491537896e-05, | |
| "loss": 0.0056, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.7273205087774628e-05, | |
| "loss": 0.0165, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.7246925260170297e-05, | |
| "loss": 0.0099, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.722064543256596e-05, | |
| "loss": 0.0075, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.719436560496163e-05, | |
| "loss": 0.0019, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.71680857773573e-05, | |
| "loss": 0.0021, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.714180594975297e-05, | |
| "loss": 0.0109, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.711552612214864e-05, | |
| "loss": 0.023, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.7089246294544307e-05, | |
| "loss": 0.0027, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.7062966466939975e-05, | |
| "loss": 0.0035, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.7036686639335647e-05, | |
| "loss": 0.0036, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.7010406811731316e-05, | |
| "loss": 0.0024, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.6984126984126984e-05, | |
| "loss": 0.0049, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.6957847156522652e-05, | |
| "loss": 0.0185, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.693156732891832e-05, | |
| "loss": 0.0057, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.6905287501313993e-05, | |
| "loss": 0.0021, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.687900767370966e-05, | |
| "loss": 0.0028, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_f1": 0.9918819873108211, | |
| "eval_f1_0": 0.9974759191679455, | |
| "eval_f1_1": 0.9981394867589323, | |
| "eval_f1_2": 0.9730079967192946, | |
| "eval_f1_3": 0.9989045465971121, | |
| "eval_loss": 0.01633334718644619, | |
| "eval_runtime": 53.2283, | |
| "eval_samples_per_second": 222.964, | |
| "eval_steps_per_second": 0.883, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.685272784610533e-05, | |
| "loss": 0.008, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.6826448018500998e-05, | |
| "loss": 0.0211, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.680016819089667e-05, | |
| "loss": 0.0201, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.6773888363292338e-05, | |
| "loss": 0.0162, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.6747608535688006e-05, | |
| "loss": 0.0107, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.6721328708083675e-05, | |
| "loss": 0.0047, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.6695048880479347e-05, | |
| "loss": 0.0052, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.6668769052875015e-05, | |
| "loss": 0.0069, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.6642489225270683e-05, | |
| "loss": 0.0029, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.6616209397666352e-05, | |
| "loss": 0.0019, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.6589929570062024e-05, | |
| "loss": 0.0098, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.6563649742457692e-05, | |
| "loss": 0.0133, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.653736991485336e-05, | |
| "loss": 0.0141, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.651109008724903e-05, | |
| "loss": 0.0761, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.64848102596447e-05, | |
| "loss": 0.0017, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.645853043204037e-05, | |
| "loss": 0.0135, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.6432250604436037e-05, | |
| "loss": 0.0088, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.6405970776831706e-05, | |
| "loss": 0.0071, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.6379690949227374e-05, | |
| "loss": 0.0033, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.6353411121623046e-05, | |
| "loss": 0.0038, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.9837313140464833, | |
| "eval_f1_0": 0.9951107926131856, | |
| "eval_f1_1": 0.9979246154406312, | |
| "eval_f1_2": 0.9429821948777298, | |
| "eval_f1_3": 0.9989076532543865, | |
| "eval_loss": 0.04059078171849251, | |
| "eval_runtime": 53.2583, | |
| "eval_samples_per_second": 222.839, | |
| "eval_steps_per_second": 0.882, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.6327131294018714e-05, | |
| "loss": 0.0319, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.6300851466414383e-05, | |
| "loss": 0.0336, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.627457163881005e-05, | |
| "loss": 0.0063, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.6248291811205723e-05, | |
| "loss": 0.009, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.622201198360139e-05, | |
| "loss": 0.0084, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.619573215599706e-05, | |
| "loss": 0.0014, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 2.6169452328392728e-05, | |
| "loss": 0.0065, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 2.61431725007884e-05, | |
| "loss": 0.0013, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 2.6116892673184062e-05, | |
| "loss": 0.0048, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.609061284557973e-05, | |
| "loss": 0.0073, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.6064333017975402e-05, | |
| "loss": 0.0082, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.603805319037107e-05, | |
| "loss": 0.0014, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.601177336276674e-05, | |
| "loss": 0.0076, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.5985493535162407e-05, | |
| "loss": 0.0092, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.595921370755808e-05, | |
| "loss": 0.021, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.5932933879953747e-05, | |
| "loss": 0.0036, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.5906654052349416e-05, | |
| "loss": 0.0022, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.5880374224745084e-05, | |
| "loss": 0.0199, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.5854094397140756e-05, | |
| "loss": 0.0033, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.5827814569536424e-05, | |
| "loss": 0.0121, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "eval_f1": 0.9884805553259847, | |
| "eval_f1_0": 0.9963046652881733, | |
| "eval_f1_1": 0.9979486825557499, | |
| "eval_f1_2": 0.960680592016051, | |
| "eval_f1_3": 0.9989882814439645, | |
| "eval_loss": 0.02228163555264473, | |
| "eval_runtime": 53.3261, | |
| "eval_samples_per_second": 222.555, | |
| "eval_steps_per_second": 0.881, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.5801534741932093e-05, | |
| "loss": 0.0025, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.577525491432776e-05, | |
| "loss": 0.0011, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.5748975086723433e-05, | |
| "loss": 0.0033, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2.57226952591191e-05, | |
| "loss": 0.0029, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2.569641543151477e-05, | |
| "loss": 0.0118, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2.5670135603910438e-05, | |
| "loss": 0.0021, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.5643855776306107e-05, | |
| "loss": 0.0129, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.561757594870178e-05, | |
| "loss": 0.0025, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.5591296121097447e-05, | |
| "loss": 0.0318, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.5565016293493115e-05, | |
| "loss": 0.0046, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.5538736465888784e-05, | |
| "loss": 0.0038, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.5512456638284455e-05, | |
| "loss": 0.0114, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.5486176810680124e-05, | |
| "loss": 0.0024, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.5459896983075792e-05, | |
| "loss": 0.0026, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.543361715547146e-05, | |
| "loss": 0.0019, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.5407337327867132e-05, | |
| "loss": 0.0057, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.53810575002628e-05, | |
| "loss": 0.0012, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.535477767265847e-05, | |
| "loss": 0.0109, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.5328497845054138e-05, | |
| "loss": 0.0036, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.530221801744981e-05, | |
| "loss": 0.0029, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "eval_f1": 0.9915827182707018, | |
| "eval_f1_0": 0.9973477738948522, | |
| "eval_f1_1": 0.9979634260466687, | |
| "eval_f1_2": 0.9723783772229823, | |
| "eval_f1_3": 0.9986412959183041, | |
| "eval_loss": 0.019460231065750122, | |
| "eval_runtime": 53.2808, | |
| "eval_samples_per_second": 222.744, | |
| "eval_steps_per_second": 0.882, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.5275938189845478e-05, | |
| "loss": 0.0022, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.5249658362241146e-05, | |
| "loss": 0.0035, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.5223378534636815e-05, | |
| "loss": 0.0017, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.5197098707032486e-05, | |
| "loss": 0.005, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.5170818879428155e-05, | |
| "loss": 0.003, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.5144539051823823e-05, | |
| "loss": 0.02, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.511825922421949e-05, | |
| "loss": 0.0036, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.509197939661516e-05, | |
| "loss": 0.0013, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.5065699569010832e-05, | |
| "loss": 0.0015, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.50394197414065e-05, | |
| "loss": 0.0028, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.501313991380217e-05, | |
| "loss": 0.0026, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.4986860086197837e-05, | |
| "loss": 0.0037, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.4960580258593506e-05, | |
| "loss": 0.0564, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.4934300430989174e-05, | |
| "loss": 0.0096, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.4908020603384842e-05, | |
| "loss": 0.0016, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.4881740775780514e-05, | |
| "loss": 0.0052, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.4855460948176183e-05, | |
| "loss": 0.0012, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.482918112057185e-05, | |
| "loss": 0.0031, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.480290129296752e-05, | |
| "loss": 0.0028, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 2.477662146536319e-05, | |
| "loss": 0.0019, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "eval_f1": 0.9911705258446226, | |
| "eval_f1_0": 0.9972116420434848, | |
| "eval_f1_1": 0.9983713230222113, | |
| "eval_f1_2": 0.9701202907562665, | |
| "eval_f1_3": 0.9989788475565281, | |
| "eval_loss": 0.02063840441405773, | |
| "eval_runtime": 53.403, | |
| "eval_samples_per_second": 222.235, | |
| "eval_steps_per_second": 0.88, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "step": 10600, | |
| "total_flos": 2.1233219239359283e+17, | |
| "train_loss": 0.03157895615913044, | |
| "train_runtime": 7385.7528, | |
| "train_samples_per_second": 86.764, | |
| "train_steps_per_second": 2.712 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 20028, | |
| "num_train_epochs": 6, | |
| "save_steps": 200, | |
| "total_flos": 2.1233219239359283e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |