| { |
| "best_metric": 0.05866290256381035, |
| "best_model_checkpoint": "ViT_Flower102/checkpoint-2800", |
| "epoch": 10.0, |
| "eval_steps": 100, |
| "global_step": 4490, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "grad_norm": 3.8268136978149414, |
| "learning_rate": 0.00019955456570155904, |
| "loss": 4.7201, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 3.8958423137664795, |
| "learning_rate": 0.00019910913140311804, |
| "loss": 4.1076, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 3.8826911449432373, |
| "learning_rate": 0.00019866369710467706, |
| "loss": 3.6825, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 3.5749828815460205, |
| "learning_rate": 0.0001982182628062361, |
| "loss": 3.2094, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 3.417900562286377, |
| "learning_rate": 0.00019777282850779511, |
| "loss": 2.7548, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 3.6577537059783936, |
| "learning_rate": 0.00019732739420935414, |
| "loss": 2.3203, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 3.3995978832244873, |
| "learning_rate": 0.00019688195991091317, |
| "loss": 2.1239, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 3.805372953414917, |
| "learning_rate": 0.00019643652561247217, |
| "loss": 1.8203, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.9247817993164062, |
| "learning_rate": 0.0001959910913140312, |
| "loss": 1.5999, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 4.586317539215088, |
| "learning_rate": 0.0001955456570155902, |
| "loss": 1.3962, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_accuracy": 0.7372549019607844, |
| "eval_f1": 0.7372549019607844, |
| "eval_loss": 1.7861285209655762, |
| "eval_precision": 0.7372549019607844, |
| "eval_recall": 0.7372549019607844, |
| "eval_runtime": 18.6216, |
| "eval_samples_per_second": 54.775, |
| "eval_steps_per_second": 6.874, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 3.271233558654785, |
| "learning_rate": 0.00019510022271714922, |
| "loss": 1.2936, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 3.2348804473876953, |
| "learning_rate": 0.00019465478841870825, |
| "loss": 1.1509, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 3.311753034591675, |
| "learning_rate": 0.00019420935412026727, |
| "loss": 1.0911, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.8280277252197266, |
| "learning_rate": 0.0001937639198218263, |
| "loss": 0.8982, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.301532506942749, |
| "learning_rate": 0.00019331848552338533, |
| "loss": 0.8682, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 6.380126476287842, |
| "learning_rate": 0.00019287305122494432, |
| "loss": 0.9249, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 4.599637985229492, |
| "learning_rate": 0.00019242761692650335, |
| "loss": 0.663, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.579926013946533, |
| "learning_rate": 0.00019198218262806238, |
| "loss": 0.5836, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.810037612915039, |
| "learning_rate": 0.00019153674832962138, |
| "loss": 0.5488, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 3.790639638900757, |
| "learning_rate": 0.0001910913140311804, |
| "loss": 0.5196, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_accuracy": 0.8950980392156863, |
| "eval_f1": 0.8950980392156863, |
| "eval_loss": 0.7526528239250183, |
| "eval_precision": 0.8950980392156863, |
| "eval_recall": 0.8950980392156863, |
| "eval_runtime": 16.5095, |
| "eval_samples_per_second": 61.783, |
| "eval_steps_per_second": 7.753, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 4.308547496795654, |
| "learning_rate": 0.00019064587973273943, |
| "loss": 0.4415, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.616471290588379, |
| "learning_rate": 0.00019020044543429846, |
| "loss": 0.6096, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.0455899238586426, |
| "learning_rate": 0.00018975501113585748, |
| "loss": 0.5266, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 2.2380969524383545, |
| "learning_rate": 0.00018930957683741648, |
| "loss": 0.4699, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.4067838191986084, |
| "learning_rate": 0.0001888641425389755, |
| "loss": 0.3705, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.8906757831573486, |
| "learning_rate": 0.00018841870824053454, |
| "loss": 0.3289, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.221278667449951, |
| "learning_rate": 0.00018797327394209353, |
| "loss": 0.3145, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.2921174764633179, |
| "learning_rate": 0.00018752783964365256, |
| "loss": 0.2449, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.6907320618629456, |
| "learning_rate": 0.0001870824053452116, |
| "loss": 0.2288, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 4.547955513000488, |
| "learning_rate": 0.00018663697104677061, |
| "loss": 0.355, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_accuracy": 0.9450980392156862, |
| "eval_f1": 0.9450980392156862, |
| "eval_loss": 0.39370954036712646, |
| "eval_precision": 0.9450980392156862, |
| "eval_recall": 0.9450980392156862, |
| "eval_runtime": 16.7113, |
| "eval_samples_per_second": 61.037, |
| "eval_steps_per_second": 7.659, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 3.801025390625, |
| "learning_rate": 0.00018619153674832964, |
| "loss": 0.3294, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 4.912439823150635, |
| "learning_rate": 0.00018574610244988867, |
| "loss": 0.378, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 4.292325496673584, |
| "learning_rate": 0.00018530066815144767, |
| "loss": 0.3388, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 3.0897388458251953, |
| "learning_rate": 0.0001848552338530067, |
| "loss": 0.3562, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.37573301792144775, |
| "learning_rate": 0.00018440979955456572, |
| "loss": 0.1474, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.6090143918991089, |
| "learning_rate": 0.00018396436525612472, |
| "loss": 0.2014, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.0324394702911377, |
| "learning_rate": 0.00018351893095768375, |
| "loss": 0.2915, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 3.8301498889923096, |
| "learning_rate": 0.00018307349665924277, |
| "loss": 0.2966, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.9274563789367676, |
| "learning_rate": 0.0001826280623608018, |
| "loss": 0.2235, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.3997691869735718, |
| "learning_rate": 0.00018218262806236082, |
| "loss": 0.1966, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.89, |
| "eval_accuracy": 0.942156862745098, |
| "eval_f1": 0.942156862745098, |
| "eval_loss": 0.3312070369720459, |
| "eval_precision": 0.942156862745098, |
| "eval_recall": 0.942156862745098, |
| "eval_runtime": 16.6507, |
| "eval_samples_per_second": 61.259, |
| "eval_steps_per_second": 7.687, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.6399450302124023, |
| "learning_rate": 0.00018173719376391982, |
| "loss": 0.2518, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 5.330190658569336, |
| "learning_rate": 0.00018129175946547885, |
| "loss": 0.1769, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 3.7638723850250244, |
| "learning_rate": 0.00018084632516703788, |
| "loss": 0.2792, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.6987600326538086, |
| "learning_rate": 0.00018040089086859688, |
| "loss": 0.1728, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.16931219398975372, |
| "learning_rate": 0.0001799554565701559, |
| "loss": 0.091, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 4.015316009521484, |
| "learning_rate": 0.00017951002227171493, |
| "loss": 0.1133, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 2.7753520011901855, |
| "learning_rate": 0.00017906458797327396, |
| "loss": 0.1541, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 1.3259629011154175, |
| "learning_rate": 0.00017861915367483298, |
| "loss": 0.0757, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 0.1445445716381073, |
| "learning_rate": 0.000178173719376392, |
| "loss": 0.0574, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 5.162754058837891, |
| "learning_rate": 0.000177728285077951, |
| "loss": 0.1262, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_accuracy": 0.9607843137254902, |
| "eval_f1": 0.9607843137254902, |
| "eval_loss": 0.2145027369260788, |
| "eval_precision": 0.9607843137254902, |
| "eval_recall": 0.9607843137254902, |
| "eval_runtime": 16.8511, |
| "eval_samples_per_second": 60.53, |
| "eval_steps_per_second": 7.596, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 0.1372026801109314, |
| "learning_rate": 0.00017728285077951003, |
| "loss": 0.0445, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.07345832884311676, |
| "learning_rate": 0.00017683741648106903, |
| "loss": 0.0362, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.13113035261631012, |
| "learning_rate": 0.00017639198218262806, |
| "loss": 0.0587, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.47994276881217957, |
| "learning_rate": 0.0001759465478841871, |
| "loss": 0.0555, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 0.19476382434368134, |
| "learning_rate": 0.00017550111358574611, |
| "loss": 0.062, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.8010620474815369, |
| "learning_rate": 0.00017505567928730514, |
| "loss": 0.0392, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 1.4072810411453247, |
| "learning_rate": 0.00017461024498886417, |
| "loss": 0.0797, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 1.0842857360839844, |
| "learning_rate": 0.00017416481069042317, |
| "loss": 0.0247, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 0.0485902801156044, |
| "learning_rate": 0.0001737193763919822, |
| "loss": 0.0269, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.2984197735786438, |
| "learning_rate": 0.00017327394209354122, |
| "loss": 0.1512, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_accuracy": 0.9705882352941176, |
| "eval_f1": 0.9705882352941176, |
| "eval_loss": 0.16516022384166718, |
| "eval_precision": 0.9705882352941176, |
| "eval_recall": 0.9705882352941176, |
| "eval_runtime": 16.9645, |
| "eval_samples_per_second": 60.126, |
| "eval_steps_per_second": 7.545, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 7.004275321960449, |
| "learning_rate": 0.00017282850779510022, |
| "loss": 0.1144, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 2.13541579246521, |
| "learning_rate": 0.00017238307349665924, |
| "loss": 0.058, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 2.4369468688964844, |
| "learning_rate": 0.00017193763919821827, |
| "loss": 0.0779, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 0.1981283277273178, |
| "learning_rate": 0.0001714922048997773, |
| "loss": 0.0754, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.08032400906085968, |
| "learning_rate": 0.00017104677060133632, |
| "loss": 0.0796, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 0.40804997086524963, |
| "learning_rate": 0.00017060133630289532, |
| "loss": 0.0449, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 0.05186638608574867, |
| "learning_rate": 0.00017015590200445435, |
| "loss": 0.1531, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 1.0731438398361206, |
| "learning_rate": 0.00016971046770601338, |
| "loss": 0.0788, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.0614326074719429, |
| "learning_rate": 0.00016926503340757238, |
| "loss": 0.1072, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.03861929103732109, |
| "learning_rate": 0.0001688195991091314, |
| "loss": 0.1414, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.56, |
| "eval_accuracy": 0.9470588235294117, |
| "eval_f1": 0.9470588235294117, |
| "eval_loss": 0.25619158148765564, |
| "eval_precision": 0.9470588235294117, |
| "eval_recall": 0.9470588235294117, |
| "eval_runtime": 17.0703, |
| "eval_samples_per_second": 59.753, |
| "eval_steps_per_second": 7.498, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.042193703353405, |
| "learning_rate": 0.00016837416481069043, |
| "loss": 0.1056, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 12.006841659545898, |
| "learning_rate": 0.00016792873051224946, |
| "loss": 0.1301, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 0.5111542344093323, |
| "learning_rate": 0.00016748329621380848, |
| "loss": 0.0599, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 2.5436642169952393, |
| "learning_rate": 0.0001670378619153675, |
| "loss": 0.034, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.12912799417972565, |
| "learning_rate": 0.0001665924276169265, |
| "loss": 0.0423, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 0.3576439917087555, |
| "learning_rate": 0.00016614699331848553, |
| "loss": 0.0732, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 0.39783817529678345, |
| "learning_rate": 0.00016570155902004456, |
| "loss": 0.0676, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 2.669060468673706, |
| "learning_rate": 0.00016525612472160356, |
| "loss": 0.0704, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.060641657561063766, |
| "learning_rate": 0.0001648106904231626, |
| "loss": 0.0387, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.08565088361501694, |
| "learning_rate": 0.0001643652561247216, |
| "loss": 0.1235, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.78, |
| "eval_accuracy": 0.9656862745098039, |
| "eval_f1": 0.9656862745098039, |
| "eval_loss": 0.17419515550136566, |
| "eval_precision": 0.9656862745098039, |
| "eval_recall": 0.9656862745098039, |
| "eval_runtime": 17.2172, |
| "eval_samples_per_second": 59.243, |
| "eval_steps_per_second": 7.434, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 4.969184875488281, |
| "learning_rate": 0.00016391982182628064, |
| "loss": 0.1165, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 1.115045428276062, |
| "learning_rate": 0.00016347438752783967, |
| "loss": 0.1023, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.0672130435705185, |
| "learning_rate": 0.00016302895322939867, |
| "loss": 0.0921, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 0.11427325010299683, |
| "learning_rate": 0.0001625835189309577, |
| "loss": 0.0202, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 0.08146195858716965, |
| "learning_rate": 0.00016213808463251672, |
| "loss": 0.0906, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.06222369149327278, |
| "learning_rate": 0.00016169265033407572, |
| "loss": 0.1457, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 0.06050724536180496, |
| "learning_rate": 0.00016124721603563474, |
| "loss": 0.0185, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.33284619450569153, |
| "learning_rate": 0.00016080178173719377, |
| "loss": 0.1351, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 3.9659552574157715, |
| "learning_rate": 0.0001603563474387528, |
| "loss": 0.1317, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.04289441928267479, |
| "learning_rate": 0.00015991091314031182, |
| "loss": 0.0428, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.957843137254902, |
| "eval_f1": 0.957843137254902, |
| "eval_loss": 0.18091771006584167, |
| "eval_precision": 0.957843137254902, |
| "eval_recall": 0.957843137254902, |
| "eval_runtime": 17.3889, |
| "eval_samples_per_second": 58.658, |
| "eval_steps_per_second": 7.361, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.03, |
| "grad_norm": 7.464574337005615, |
| "learning_rate": 0.00015946547884187085, |
| "loss": 0.0544, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 0.03491633012890816, |
| "learning_rate": 0.00015902004454342985, |
| "loss": 0.0221, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.07, |
| "grad_norm": 0.027076447382569313, |
| "learning_rate": 0.00015857461024498888, |
| "loss": 0.0293, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.09, |
| "grad_norm": 0.39794862270355225, |
| "learning_rate": 0.0001581291759465479, |
| "loss": 0.0279, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 0.0606105700135231, |
| "learning_rate": 0.0001576837416481069, |
| "loss": 0.0113, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.14, |
| "grad_norm": 3.0092263221740723, |
| "learning_rate": 0.00015723830734966593, |
| "loss": 0.0189, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.02015094831585884, |
| "learning_rate": 0.00015679287305122495, |
| "loss": 0.0453, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.18, |
| "grad_norm": 0.7719711065292358, |
| "learning_rate": 0.00015634743875278398, |
| "loss": 0.1122, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.28093260526657104, |
| "learning_rate": 0.000155902004454343, |
| "loss": 0.052, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.23, |
| "grad_norm": 1.1967350244522095, |
| "learning_rate": 0.000155456570155902, |
| "loss": 0.0202, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.23, |
| "eval_accuracy": 0.9637254901960784, |
| "eval_f1": 0.9637254901960784, |
| "eval_loss": 0.15183669328689575, |
| "eval_precision": 0.9637254901960784, |
| "eval_recall": 0.9637254901960784, |
| "eval_runtime": 16.9649, |
| "eval_samples_per_second": 60.124, |
| "eval_steps_per_second": 7.545, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.05678591504693031, |
| "learning_rate": 0.00015501113585746103, |
| "loss": 0.037, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.27, |
| "grad_norm": 2.0827012062072754, |
| "learning_rate": 0.00015456570155902006, |
| "loss": 0.1636, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.29, |
| "grad_norm": 0.03390209749341011, |
| "learning_rate": 0.00015412026726057906, |
| "loss": 0.0298, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.15837769210338593, |
| "learning_rate": 0.00015367483296213809, |
| "loss": 0.0168, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.34, |
| "grad_norm": 0.018268901854753494, |
| "learning_rate": 0.0001532293986636971, |
| "loss": 0.0561, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 0.1429576724767685, |
| "learning_rate": 0.00015278396436525614, |
| "loss": 0.0073, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 0.228489488363266, |
| "learning_rate": 0.00015233853006681517, |
| "loss": 0.0109, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.41, |
| "grad_norm": 0.03329596295952797, |
| "learning_rate": 0.0001518930957683742, |
| "loss": 0.0566, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.43, |
| "grad_norm": 0.015951553359627724, |
| "learning_rate": 0.0001514476614699332, |
| "loss": 0.0057, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 0.04213535785675049, |
| "learning_rate": 0.00015100222717149222, |
| "loss": 0.0451, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.45, |
| "eval_accuracy": 0.9725490196078431, |
| "eval_f1": 0.9725490196078431, |
| "eval_loss": 0.12136277556419373, |
| "eval_precision": 0.9725490196078431, |
| "eval_recall": 0.9725490196078431, |
| "eval_runtime": 16.9793, |
| "eval_samples_per_second": 60.073, |
| "eval_steps_per_second": 7.539, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.47, |
| "grad_norm": 0.639216959476471, |
| "learning_rate": 0.00015055679287305122, |
| "loss": 0.0076, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.49, |
| "grad_norm": 0.10043849796056747, |
| "learning_rate": 0.00015011135857461024, |
| "loss": 0.0536, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 0.10532275587320328, |
| "learning_rate": 0.00014966592427616927, |
| "loss": 0.0049, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.54, |
| "grad_norm": 0.04534973204135895, |
| "learning_rate": 0.0001492204899777283, |
| "loss": 0.0071, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 0.020227691158652306, |
| "learning_rate": 0.00014877505567928732, |
| "loss": 0.006, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.58, |
| "grad_norm": 0.09551525861024857, |
| "learning_rate": 0.00014832962138084635, |
| "loss": 0.0129, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.61, |
| "grad_norm": 3.5077669620513916, |
| "learning_rate": 0.00014788418708240535, |
| "loss": 0.0172, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.63, |
| "grad_norm": 0.021929070353507996, |
| "learning_rate": 0.00014743875278396438, |
| "loss": 0.0096, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 0.23107671737670898, |
| "learning_rate": 0.0001469933184855234, |
| "loss": 0.032, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.67, |
| "grad_norm": 0.021303491666913033, |
| "learning_rate": 0.0001465478841870824, |
| "loss": 0.0208, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.67, |
| "eval_accuracy": 0.9725490196078431, |
| "eval_f1": 0.9725490196078431, |
| "eval_loss": 0.12741123139858246, |
| "eval_precision": 0.9725490196078431, |
| "eval_recall": 0.9725490196078431, |
| "eval_runtime": 16.6662, |
| "eval_samples_per_second": 61.202, |
| "eval_steps_per_second": 7.68, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.69, |
| "grad_norm": 0.03139738738536835, |
| "learning_rate": 0.00014610244988864143, |
| "loss": 0.0285, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.72, |
| "grad_norm": 0.137286975979805, |
| "learning_rate": 0.00014565701559020045, |
| "loss": 0.0132, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.74, |
| "grad_norm": 0.020747728645801544, |
| "learning_rate": 0.00014521158129175948, |
| "loss": 0.0499, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 0.039346180856227875, |
| "learning_rate": 0.0001447661469933185, |
| "loss": 0.0229, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.78, |
| "grad_norm": 6.306964874267578, |
| "learning_rate": 0.0001443207126948775, |
| "loss": 0.0368, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.81, |
| "grad_norm": 0.015782644972205162, |
| "learning_rate": 0.00014387527839643653, |
| "loss": 0.0083, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.83, |
| "grad_norm": 0.03363679349422455, |
| "learning_rate": 0.00014342984409799556, |
| "loss": 0.0216, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 0.016568714752793312, |
| "learning_rate": 0.00014298440979955456, |
| "loss": 0.0512, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.87, |
| "grad_norm": 0.03141006454825401, |
| "learning_rate": 0.00014253897550111359, |
| "loss": 0.0161, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 1.3684362173080444, |
| "learning_rate": 0.0001420935412026726, |
| "loss": 0.0673, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.9, |
| "eval_accuracy": 0.9627450980392157, |
| "eval_f1": 0.9627450980392157, |
| "eval_loss": 0.19035381078720093, |
| "eval_precision": 0.9627450980392157, |
| "eval_recall": 0.9627450980392157, |
| "eval_runtime": 16.7181, |
| "eval_samples_per_second": 61.012, |
| "eval_steps_per_second": 7.656, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 0.18714672327041626, |
| "learning_rate": 0.00014164810690423164, |
| "loss": 0.0617, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.94, |
| "grad_norm": 0.12209412455558777, |
| "learning_rate": 0.00014120267260579067, |
| "loss": 0.0111, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 0.029331212863326073, |
| "learning_rate": 0.0001407572383073497, |
| "loss": 0.0145, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.98, |
| "grad_norm": 9.576412200927734, |
| "learning_rate": 0.0001403118040089087, |
| "loss": 0.0467, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.01, |
| "grad_norm": 0.016860289499163628, |
| "learning_rate": 0.00013986636971046772, |
| "loss": 0.0055, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.03, |
| "grad_norm": 0.016643529757857323, |
| "learning_rate": 0.00013942093541202674, |
| "loss": 0.0111, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.05, |
| "grad_norm": 0.043813981115818024, |
| "learning_rate": 0.00013897550111358574, |
| "loss": 0.0385, |
| "step": 1370 |
| }, |
| { |
| "epoch": 3.07, |
| "grad_norm": 0.013489973731338978, |
| "learning_rate": 0.00013853006681514477, |
| "loss": 0.0041, |
| "step": 1380 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 0.01467512734234333, |
| "learning_rate": 0.00013808463251670377, |
| "loss": 0.0642, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.12, |
| "grad_norm": 0.015836119651794434, |
| "learning_rate": 0.00013763919821826282, |
| "loss": 0.0347, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.12, |
| "eval_accuracy": 0.9764705882352941, |
| "eval_f1": 0.9764705882352941, |
| "eval_loss": 0.11006435751914978, |
| "eval_precision": 0.9764705882352941, |
| "eval_recall": 0.9764705882352941, |
| "eval_runtime": 16.8474, |
| "eval_samples_per_second": 60.543, |
| "eval_steps_per_second": 7.598, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.14, |
| "grad_norm": 0.034053388983011246, |
| "learning_rate": 0.00013719376391982185, |
| "loss": 0.0064, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.16, |
| "grad_norm": 0.06026717275381088, |
| "learning_rate": 0.00013674832962138085, |
| "loss": 0.0612, |
| "step": 1420 |
| }, |
| { |
| "epoch": 3.18, |
| "grad_norm": 0.013388673774898052, |
| "learning_rate": 0.00013630289532293988, |
| "loss": 0.0172, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.21, |
| "grad_norm": 0.0726642981171608, |
| "learning_rate": 0.0001358574610244989, |
| "loss": 0.009, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.23, |
| "grad_norm": 1.1316585540771484, |
| "learning_rate": 0.0001354120267260579, |
| "loss": 0.0772, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 0.3327818512916565, |
| "learning_rate": 0.00013496659242761693, |
| "loss": 0.0136, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.27, |
| "grad_norm": 0.03998925909399986, |
| "learning_rate": 0.00013452115812917595, |
| "loss": 0.0058, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.3, |
| "grad_norm": 0.010353831574320793, |
| "learning_rate": 0.00013407572383073498, |
| "loss": 0.0277, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.32, |
| "grad_norm": 0.0170371625572443, |
| "learning_rate": 0.000133630289532294, |
| "loss": 0.015, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.34, |
| "grad_norm": 0.014920663088560104, |
| "learning_rate": 0.00013318485523385303, |
| "loss": 0.0035, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.34, |
| "eval_accuracy": 0.9764705882352941, |
| "eval_f1": 0.9764705882352941, |
| "eval_loss": 0.1274050772190094, |
| "eval_precision": 0.9764705882352941, |
| "eval_recall": 0.9764705882352941, |
| "eval_runtime": 16.7515, |
| "eval_samples_per_second": 60.89, |
| "eval_steps_per_second": 7.641, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 0.022045228630304337, |
| "learning_rate": 0.00013273942093541203, |
| "loss": 0.0825, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.39, |
| "grad_norm": 0.020985079929232597, |
| "learning_rate": 0.00013229398663697106, |
| "loss": 0.0045, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.41, |
| "grad_norm": 0.016269657760858536, |
| "learning_rate": 0.00013184855233853006, |
| "loss": 0.0041, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.43, |
| "grad_norm": 0.011222785338759422, |
| "learning_rate": 0.00013140311804008909, |
| "loss": 0.0035, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.45, |
| "grad_norm": 0.015975764021277428, |
| "learning_rate": 0.0001309576837416481, |
| "loss": 0.0037, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.47, |
| "grad_norm": 0.009478983469307423, |
| "learning_rate": 0.0001305122494432071, |
| "loss": 0.0028, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.019075946882367134, |
| "learning_rate": 0.00013006681514476616, |
| "loss": 0.0025, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 0.013682669959962368, |
| "learning_rate": 0.0001296213808463252, |
| "loss": 0.0025, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.54, |
| "grad_norm": 0.07036701589822769, |
| "learning_rate": 0.0001291759465478842, |
| "loss": 0.0027, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.56, |
| "grad_norm": 0.008111678995192051, |
| "learning_rate": 0.00012873051224944322, |
| "loss": 0.0629, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.56, |
| "eval_accuracy": 0.9833333333333333, |
| "eval_f1": 0.9833333333333333, |
| "eval_loss": 0.07429105043411255, |
| "eval_precision": 0.9833333333333333, |
| "eval_recall": 0.9833333333333333, |
| "eval_runtime": 16.8989, |
| "eval_samples_per_second": 60.359, |
| "eval_steps_per_second": 7.574, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.59, |
| "grad_norm": 0.01713545434176922, |
| "learning_rate": 0.00012828507795100224, |
| "loss": 0.0026, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.61, |
| "grad_norm": 0.20383290946483612, |
| "learning_rate": 0.00012783964365256124, |
| "loss": 0.0033, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.63, |
| "grad_norm": 0.007501365151256323, |
| "learning_rate": 0.00012739420935412027, |
| "loss": 0.0023, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.65, |
| "grad_norm": 0.02385942079126835, |
| "learning_rate": 0.0001269487750556793, |
| "loss": 0.0031, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.67, |
| "grad_norm": 0.015214544720947742, |
| "learning_rate": 0.00012650334075723832, |
| "loss": 0.0021, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.7, |
| "grad_norm": 0.015637781471014023, |
| "learning_rate": 0.00012605790645879735, |
| "loss": 0.0029, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.72, |
| "grad_norm": 0.040307339280843735, |
| "learning_rate": 0.00012561247216035635, |
| "loss": 0.0025, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.74, |
| "grad_norm": 0.006837761495262384, |
| "learning_rate": 0.00012516703786191537, |
| "loss": 0.0025, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.76, |
| "grad_norm": 0.011093956418335438, |
| "learning_rate": 0.0001247216035634744, |
| "loss": 0.0033, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.79, |
| "grad_norm": 0.010432664304971695, |
| "learning_rate": 0.0001242761692650334, |
| "loss": 0.0368, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.79, |
| "eval_accuracy": 0.9803921568627451, |
| "eval_f1": 0.9803921568627451, |
| "eval_loss": 0.08009134232997894, |
| "eval_precision": 0.9803921568627451, |
| "eval_recall": 0.9803921568627451, |
| "eval_runtime": 16.9138, |
| "eval_samples_per_second": 60.306, |
| "eval_steps_per_second": 7.568, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.81, |
| "grad_norm": 0.016272857785224915, |
| "learning_rate": 0.00012383073496659243, |
| "loss": 0.003, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.83, |
| "grad_norm": 0.029770145192742348, |
| "learning_rate": 0.00012338530066815145, |
| "loss": 0.0039, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.85, |
| "grad_norm": 0.007188919931650162, |
| "learning_rate": 0.00012293986636971045, |
| "loss": 0.0073, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.88, |
| "grad_norm": 0.009417420253157616, |
| "learning_rate": 0.0001224944320712695, |
| "loss": 0.0034, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.9, |
| "grad_norm": 0.01681813970208168, |
| "learning_rate": 0.00012204899777282852, |
| "loss": 0.0106, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.92, |
| "grad_norm": 0.009343329817056656, |
| "learning_rate": 0.00012160356347438753, |
| "loss": 0.0027, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.94, |
| "grad_norm": 0.05181660130620003, |
| "learning_rate": 0.00012115812917594656, |
| "loss": 0.003, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.96, |
| "grad_norm": 0.012591933831572533, |
| "learning_rate": 0.00012071269487750559, |
| "loss": 0.0024, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.99, |
| "grad_norm": 0.011042669415473938, |
| "learning_rate": 0.00012026726057906458, |
| "loss": 0.002, |
| "step": 1790 |
| }, |
| { |
| "epoch": 4.01, |
| "grad_norm": 0.008775055408477783, |
| "learning_rate": 0.00011982182628062361, |
| "loss": 0.0021, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.01, |
| "eval_accuracy": 0.9794117647058823, |
| "eval_f1": 0.9794117647058823, |
| "eval_loss": 0.09472937136888504, |
| "eval_precision": 0.9794117647058823, |
| "eval_recall": 0.9794117647058823, |
| "eval_runtime": 17.0249, |
| "eval_samples_per_second": 59.912, |
| "eval_steps_per_second": 7.518, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.03, |
| "grad_norm": 0.012835958041250706, |
| "learning_rate": 0.00011937639198218265, |
| "loss": 0.002, |
| "step": 1810 |
| }, |
| { |
| "epoch": 4.05, |
| "grad_norm": 0.007154659368097782, |
| "learning_rate": 0.00011893095768374165, |
| "loss": 0.0019, |
| "step": 1820 |
| }, |
| { |
| "epoch": 4.08, |
| "grad_norm": 0.012052085250616074, |
| "learning_rate": 0.00011848552338530068, |
| "loss": 0.002, |
| "step": 1830 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 0.009936490096151829, |
| "learning_rate": 0.00011804008908685969, |
| "loss": 0.0019, |
| "step": 1840 |
| }, |
| { |
| "epoch": 4.12, |
| "grad_norm": 0.005963869858533144, |
| "learning_rate": 0.00011759465478841872, |
| "loss": 0.0019, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.14, |
| "grad_norm": 0.007364674471318722, |
| "learning_rate": 0.00011714922048997774, |
| "loss": 0.0019, |
| "step": 1860 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 0.006291561760008335, |
| "learning_rate": 0.00011670378619153674, |
| "loss": 0.0135, |
| "step": 1870 |
| }, |
| { |
| "epoch": 4.19, |
| "grad_norm": 0.08327926695346832, |
| "learning_rate": 0.00011625835189309577, |
| "loss": 0.0393, |
| "step": 1880 |
| }, |
| { |
| "epoch": 4.21, |
| "grad_norm": 0.005564951803535223, |
| "learning_rate": 0.0001158129175946548, |
| "loss": 0.0036, |
| "step": 1890 |
| }, |
| { |
| "epoch": 4.23, |
| "grad_norm": 0.2286999523639679, |
| "learning_rate": 0.00011536748329621381, |
| "loss": 0.0037, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.23, |
| "eval_accuracy": 0.9774509803921568, |
| "eval_f1": 0.9774509803921568, |
| "eval_loss": 0.09899948537349701, |
| "eval_precision": 0.9774509803921568, |
| "eval_recall": 0.9774509803921568, |
| "eval_runtime": 17.121, |
| "eval_samples_per_second": 59.576, |
| "eval_steps_per_second": 7.476, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 0.007444659247994423, |
| "learning_rate": 0.00011492204899777283, |
| "loss": 0.0032, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.28, |
| "grad_norm": 0.01244131289422512, |
| "learning_rate": 0.00011447661469933186, |
| "loss": 0.006, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.3, |
| "grad_norm": 0.028236160054802895, |
| "learning_rate": 0.00011403118040089087, |
| "loss": 0.002, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.32, |
| "grad_norm": 0.0059605431742966175, |
| "learning_rate": 0.0001135857461024499, |
| "loss": 0.0566, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.34, |
| "grad_norm": 0.013918939977884293, |
| "learning_rate": 0.00011314031180400893, |
| "loss": 0.002, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.37, |
| "grad_norm": 0.17530293762683868, |
| "learning_rate": 0.00011269487750556793, |
| "loss": 0.0022, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.39, |
| "grad_norm": 0.005841858685016632, |
| "learning_rate": 0.00011224944320712695, |
| "loss": 0.0025, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.41, |
| "grad_norm": 0.004602132830768824, |
| "learning_rate": 0.00011180400890868597, |
| "loss": 0.0017, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.43, |
| "grad_norm": 0.005585751961916685, |
| "learning_rate": 0.00011135857461024499, |
| "loss": 0.0015, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.45, |
| "grad_norm": 0.005063850432634354, |
| "learning_rate": 0.00011091314031180402, |
| "loss": 0.0015, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.45, |
| "eval_accuracy": 0.9823529411764705, |
| "eval_f1": 0.9823529411764705, |
| "eval_loss": 0.07819082587957382, |
| "eval_precision": 0.9823529411764705, |
| "eval_recall": 0.9823529411764705, |
| "eval_runtime": 17.1846, |
| "eval_samples_per_second": 59.355, |
| "eval_steps_per_second": 7.449, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 0.0102087976410985, |
| "learning_rate": 0.00011046770601336303, |
| "loss": 0.0017, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.006922448985278606, |
| "learning_rate": 0.00011002227171492206, |
| "loss": 0.0015, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.52, |
| "grad_norm": 0.003843717509880662, |
| "learning_rate": 0.00010957683741648108, |
| "loss": 0.0017, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.54, |
| "grad_norm": 0.03595568612217903, |
| "learning_rate": 0.00010913140311804008, |
| "loss": 0.002, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.57, |
| "grad_norm": 0.007531934417784214, |
| "learning_rate": 0.00010868596881959911, |
| "loss": 0.0016, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.59, |
| "grad_norm": 0.010021285153925419, |
| "learning_rate": 0.00010824053452115814, |
| "loss": 0.0017, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.61, |
| "grad_norm": 0.005502352025359869, |
| "learning_rate": 0.00010779510022271715, |
| "loss": 0.0025, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.63, |
| "grad_norm": 0.008162076584994793, |
| "learning_rate": 0.00010734966592427618, |
| "loss": 0.0016, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.65, |
| "grad_norm": 0.006500906310975552, |
| "learning_rate": 0.0001069042316258352, |
| "loss": 0.0051, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.68, |
| "grad_norm": 0.008429708890616894, |
| "learning_rate": 0.00010645879732739422, |
| "loss": 0.002, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.68, |
| "eval_accuracy": 0.9774509803921568, |
| "eval_f1": 0.9774509803921568, |
| "eval_loss": 0.0923672541975975, |
| "eval_precision": 0.9774509803921568, |
| "eval_recall": 0.9774509803921568, |
| "eval_runtime": 17.1715, |
| "eval_samples_per_second": 59.401, |
| "eval_steps_per_second": 7.454, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.7, |
| "grad_norm": 0.005085605662316084, |
| "learning_rate": 0.00010601336302895324, |
| "loss": 0.002, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.72, |
| "grad_norm": 0.0037075034342706203, |
| "learning_rate": 0.00010556792873051224, |
| "loss": 0.0014, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.74, |
| "grad_norm": 0.007616506423801184, |
| "learning_rate": 0.00010512249443207127, |
| "loss": 0.0018, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.77, |
| "grad_norm": 0.004252346232533455, |
| "learning_rate": 0.0001046770601336303, |
| "loss": 0.0015, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.79, |
| "grad_norm": 0.1608952432870865, |
| "learning_rate": 0.00010423162583518931, |
| "loss": 0.0019, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.81, |
| "grad_norm": 0.008307291194796562, |
| "learning_rate": 0.00010378619153674833, |
| "loss": 0.0013, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.83, |
| "grad_norm": 0.005408012308180332, |
| "learning_rate": 0.00010334075723830736, |
| "loss": 0.0015, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.86, |
| "grad_norm": 0.003423292888328433, |
| "learning_rate": 0.00010289532293986637, |
| "loss": 0.0013, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.88, |
| "grad_norm": 0.009325997903943062, |
| "learning_rate": 0.0001024498886414254, |
| "loss": 0.0015, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.9, |
| "grad_norm": 0.0038479180075228214, |
| "learning_rate": 0.00010200445434298443, |
| "loss": 0.0013, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.9, |
| "eval_accuracy": 0.9892156862745098, |
| "eval_f1": 0.9892156862745098, |
| "eval_loss": 0.06478295475244522, |
| "eval_precision": 0.9892156862745098, |
| "eval_recall": 0.9892156862745098, |
| "eval_runtime": 17.5643, |
| "eval_samples_per_second": 58.072, |
| "eval_steps_per_second": 7.288, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.92, |
| "grad_norm": 0.007743604481220245, |
| "learning_rate": 0.00010155902004454343, |
| "loss": 0.0017, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.94, |
| "grad_norm": 0.0047647771425545216, |
| "learning_rate": 0.00010111358574610245, |
| "loss": 0.0087, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.97, |
| "grad_norm": 0.00553127983585, |
| "learning_rate": 0.00010066815144766148, |
| "loss": 0.0013, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.99, |
| "grad_norm": 0.0051573943346738815, |
| "learning_rate": 0.00010022271714922049, |
| "loss": 0.0014, |
| "step": 2240 |
| }, |
| { |
| "epoch": 5.01, |
| "grad_norm": 0.007231057155877352, |
| "learning_rate": 9.977728285077952e-05, |
| "loss": 0.0014, |
| "step": 2250 |
| }, |
| { |
| "epoch": 5.03, |
| "grad_norm": 0.006223366595804691, |
| "learning_rate": 9.933184855233853e-05, |
| "loss": 0.0016, |
| "step": 2260 |
| }, |
| { |
| "epoch": 5.06, |
| "grad_norm": 0.007425224408507347, |
| "learning_rate": 9.888641425389756e-05, |
| "loss": 0.0214, |
| "step": 2270 |
| }, |
| { |
| "epoch": 5.08, |
| "grad_norm": 0.006520718801766634, |
| "learning_rate": 9.844097995545658e-05, |
| "loss": 0.0035, |
| "step": 2280 |
| }, |
| { |
| "epoch": 5.1, |
| "grad_norm": 0.009440802037715912, |
| "learning_rate": 9.79955456570156e-05, |
| "loss": 0.0016, |
| "step": 2290 |
| }, |
| { |
| "epoch": 5.12, |
| "grad_norm": 0.004528130404651165, |
| "learning_rate": 9.755011135857461e-05, |
| "loss": 0.0013, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.12, |
| "eval_accuracy": 0.984313725490196, |
| "eval_f1": 0.984313725490196, |
| "eval_loss": 0.07047847658395767, |
| "eval_precision": 0.984313725490196, |
| "eval_recall": 0.984313725490196, |
| "eval_runtime": 17.5441, |
| "eval_samples_per_second": 58.139, |
| "eval_steps_per_second": 7.296, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.14, |
| "grad_norm": 0.00686246482655406, |
| "learning_rate": 9.710467706013364e-05, |
| "loss": 0.0012, |
| "step": 2310 |
| }, |
| { |
| "epoch": 5.17, |
| "grad_norm": 0.010732870548963547, |
| "learning_rate": 9.665924276169266e-05, |
| "loss": 0.0015, |
| "step": 2320 |
| }, |
| { |
| "epoch": 5.19, |
| "grad_norm": 0.005895495880395174, |
| "learning_rate": 9.621380846325168e-05, |
| "loss": 0.0012, |
| "step": 2330 |
| }, |
| { |
| "epoch": 5.21, |
| "grad_norm": 0.002887641778215766, |
| "learning_rate": 9.576837416481069e-05, |
| "loss": 0.0012, |
| "step": 2340 |
| }, |
| { |
| "epoch": 5.23, |
| "grad_norm": 0.004080574493855238, |
| "learning_rate": 9.532293986636972e-05, |
| "loss": 0.0015, |
| "step": 2350 |
| }, |
| { |
| "epoch": 5.26, |
| "grad_norm": 0.004314645659178495, |
| "learning_rate": 9.487750556792874e-05, |
| "loss": 0.0013, |
| "step": 2360 |
| }, |
| { |
| "epoch": 5.28, |
| "grad_norm": 0.0024687820114195347, |
| "learning_rate": 9.443207126948775e-05, |
| "loss": 0.0012, |
| "step": 2370 |
| }, |
| { |
| "epoch": 5.3, |
| "grad_norm": 0.006993143353611231, |
| "learning_rate": 9.398663697104677e-05, |
| "loss": 0.0014, |
| "step": 2380 |
| }, |
| { |
| "epoch": 5.32, |
| "grad_norm": 0.0039545330218970776, |
| "learning_rate": 9.35412026726058e-05, |
| "loss": 0.001, |
| "step": 2390 |
| }, |
| { |
| "epoch": 5.35, |
| "grad_norm": 0.00476737879216671, |
| "learning_rate": 9.309576837416482e-05, |
| "loss": 0.0012, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.35, |
| "eval_accuracy": 0.9852941176470589, |
| "eval_f1": 0.9852941176470589, |
| "eval_loss": 0.06669602543115616, |
| "eval_precision": 0.9852941176470589, |
| "eval_recall": 0.9852941176470589, |
| "eval_runtime": 16.8478, |
| "eval_samples_per_second": 60.542, |
| "eval_steps_per_second": 7.597, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.37, |
| "grad_norm": 0.0059379832819104195, |
| "learning_rate": 9.265033407572383e-05, |
| "loss": 0.0013, |
| "step": 2410 |
| }, |
| { |
| "epoch": 5.39, |
| "grad_norm": 0.004163688980042934, |
| "learning_rate": 9.220489977728286e-05, |
| "loss": 0.0011, |
| "step": 2420 |
| }, |
| { |
| "epoch": 5.41, |
| "grad_norm": 0.005025350954383612, |
| "learning_rate": 9.175946547884187e-05, |
| "loss": 0.0011, |
| "step": 2430 |
| }, |
| { |
| "epoch": 5.43, |
| "grad_norm": 0.004696982447057962, |
| "learning_rate": 9.13140311804009e-05, |
| "loss": 0.0012, |
| "step": 2440 |
| }, |
| { |
| "epoch": 5.46, |
| "grad_norm": 0.006735695991665125, |
| "learning_rate": 9.086859688195991e-05, |
| "loss": 0.0012, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.48, |
| "grad_norm": 0.006193222012370825, |
| "learning_rate": 9.042316258351894e-05, |
| "loss": 0.0012, |
| "step": 2460 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 0.00294502847827971, |
| "learning_rate": 8.997772828507795e-05, |
| "loss": 0.0012, |
| "step": 2470 |
| }, |
| { |
| "epoch": 5.52, |
| "grad_norm": 0.0051245009526610374, |
| "learning_rate": 8.953229398663698e-05, |
| "loss": 0.0012, |
| "step": 2480 |
| }, |
| { |
| "epoch": 5.55, |
| "grad_norm": 0.005629925988614559, |
| "learning_rate": 8.9086859688196e-05, |
| "loss": 0.0011, |
| "step": 2490 |
| }, |
| { |
| "epoch": 5.57, |
| "grad_norm": 0.0030437533278018236, |
| "learning_rate": 8.864142538975502e-05, |
| "loss": 0.0011, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.57, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06539511680603027, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 17.0201, |
| "eval_samples_per_second": 59.929, |
| "eval_steps_per_second": 7.521, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.59, |
| "grad_norm": 0.005343435797840357, |
| "learning_rate": 8.819599109131403e-05, |
| "loss": 0.001, |
| "step": 2510 |
| }, |
| { |
| "epoch": 5.61, |
| "grad_norm": 0.0075280191376805305, |
| "learning_rate": 8.775055679287306e-05, |
| "loss": 0.0011, |
| "step": 2520 |
| }, |
| { |
| "epoch": 5.63, |
| "grad_norm": 0.004148717503994703, |
| "learning_rate": 8.730512249443208e-05, |
| "loss": 0.001, |
| "step": 2530 |
| }, |
| { |
| "epoch": 5.66, |
| "grad_norm": 0.0036075827665627003, |
| "learning_rate": 8.68596881959911e-05, |
| "loss": 0.0012, |
| "step": 2540 |
| }, |
| { |
| "epoch": 5.68, |
| "grad_norm": 0.005052719730883837, |
| "learning_rate": 8.641425389755011e-05, |
| "loss": 0.0011, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.7, |
| "grad_norm": 0.0038795568980276585, |
| "learning_rate": 8.596881959910914e-05, |
| "loss": 0.0523, |
| "step": 2560 |
| }, |
| { |
| "epoch": 5.72, |
| "grad_norm": 0.004291652236133814, |
| "learning_rate": 8.552338530066816e-05, |
| "loss": 0.001, |
| "step": 2570 |
| }, |
| { |
| "epoch": 5.75, |
| "grad_norm": 0.004954340867698193, |
| "learning_rate": 8.507795100222718e-05, |
| "loss": 0.0011, |
| "step": 2580 |
| }, |
| { |
| "epoch": 5.77, |
| "grad_norm": 0.003370743477717042, |
| "learning_rate": 8.463251670378619e-05, |
| "loss": 0.001, |
| "step": 2590 |
| }, |
| { |
| "epoch": 5.79, |
| "grad_norm": 0.0034996343310922384, |
| "learning_rate": 8.418708240534521e-05, |
| "loss": 0.001, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.79, |
| "eval_accuracy": 0.9803921568627451, |
| "eval_f1": 0.9803921568627451, |
| "eval_loss": 0.08090393990278244, |
| "eval_precision": 0.9803921568627451, |
| "eval_recall": 0.9803921568627451, |
| "eval_runtime": 16.8205, |
| "eval_samples_per_second": 60.64, |
| "eval_steps_per_second": 7.61, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.81, |
| "grad_norm": 0.005843376740813255, |
| "learning_rate": 8.374164810690424e-05, |
| "loss": 0.0026, |
| "step": 2610 |
| }, |
| { |
| "epoch": 5.84, |
| "grad_norm": 0.0032442868687212467, |
| "learning_rate": 8.329621380846325e-05, |
| "loss": 0.001, |
| "step": 2620 |
| }, |
| { |
| "epoch": 5.86, |
| "grad_norm": 0.01854085363447666, |
| "learning_rate": 8.285077951002228e-05, |
| "loss": 0.0011, |
| "step": 2630 |
| }, |
| { |
| "epoch": 5.88, |
| "grad_norm": 0.008808494545519352, |
| "learning_rate": 8.24053452115813e-05, |
| "loss": 0.0011, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.9, |
| "grad_norm": 0.005633777007460594, |
| "learning_rate": 8.195991091314032e-05, |
| "loss": 0.0011, |
| "step": 2650 |
| }, |
| { |
| "epoch": 5.92, |
| "grad_norm": 0.0047475132159888744, |
| "learning_rate": 8.151447661469933e-05, |
| "loss": 0.0011, |
| "step": 2660 |
| }, |
| { |
| "epoch": 5.95, |
| "grad_norm": 0.0034259301610291004, |
| "learning_rate": 8.106904231625836e-05, |
| "loss": 0.001, |
| "step": 2670 |
| }, |
| { |
| "epoch": 5.97, |
| "grad_norm": 0.0043120612390339375, |
| "learning_rate": 8.062360801781737e-05, |
| "loss": 0.001, |
| "step": 2680 |
| }, |
| { |
| "epoch": 5.99, |
| "grad_norm": 0.0065597849898040295, |
| "learning_rate": 8.01781737193764e-05, |
| "loss": 0.0009, |
| "step": 2690 |
| }, |
| { |
| "epoch": 6.01, |
| "grad_norm": 0.003753486555069685, |
| "learning_rate": 7.973273942093543e-05, |
| "loss": 0.001, |
| "step": 2700 |
| }, |
| { |
| "epoch": 6.01, |
| "eval_accuracy": 0.9852941176470589, |
| "eval_f1": 0.9852941176470589, |
| "eval_loss": 0.060267385095357895, |
| "eval_precision": 0.9852941176470589, |
| "eval_recall": 0.9852941176470589, |
| "eval_runtime": 16.7755, |
| "eval_samples_per_second": 60.803, |
| "eval_steps_per_second": 7.63, |
| "step": 2700 |
| }, |
| { |
| "epoch": 6.04, |
| "grad_norm": 0.004091760143637657, |
| "learning_rate": 7.928730512249444e-05, |
| "loss": 0.001, |
| "step": 2710 |
| }, |
| { |
| "epoch": 6.06, |
| "grad_norm": 0.0037813796661794186, |
| "learning_rate": 7.884187082405345e-05, |
| "loss": 0.0009, |
| "step": 2720 |
| }, |
| { |
| "epoch": 6.08, |
| "grad_norm": 0.004726367071270943, |
| "learning_rate": 7.839643652561248e-05, |
| "loss": 0.001, |
| "step": 2730 |
| }, |
| { |
| "epoch": 6.1, |
| "grad_norm": 0.0028477911837399006, |
| "learning_rate": 7.79510022271715e-05, |
| "loss": 0.0009, |
| "step": 2740 |
| }, |
| { |
| "epoch": 6.12, |
| "grad_norm": 0.002803138457238674, |
| "learning_rate": 7.750556792873052e-05, |
| "loss": 0.0009, |
| "step": 2750 |
| }, |
| { |
| "epoch": 6.15, |
| "grad_norm": 0.0030345343984663486, |
| "learning_rate": 7.706013363028953e-05, |
| "loss": 0.001, |
| "step": 2760 |
| }, |
| { |
| "epoch": 6.17, |
| "grad_norm": 0.004826263524591923, |
| "learning_rate": 7.661469933184856e-05, |
| "loss": 0.001, |
| "step": 2770 |
| }, |
| { |
| "epoch": 6.19, |
| "grad_norm": 0.005404185503721237, |
| "learning_rate": 7.616926503340758e-05, |
| "loss": 0.0011, |
| "step": 2780 |
| }, |
| { |
| "epoch": 6.21, |
| "grad_norm": 0.006678530480712652, |
| "learning_rate": 7.57238307349666e-05, |
| "loss": 0.001, |
| "step": 2790 |
| }, |
| { |
| "epoch": 6.24, |
| "grad_norm": 0.003424995578825474, |
| "learning_rate": 7.527839643652561e-05, |
| "loss": 0.0009, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.24, |
| "eval_accuracy": 0.9852941176470589, |
| "eval_f1": 0.9852941176470589, |
| "eval_loss": 0.05866290256381035, |
| "eval_precision": 0.9852941176470589, |
| "eval_recall": 0.9852941176470589, |
| "eval_runtime": 16.7849, |
| "eval_samples_per_second": 60.769, |
| "eval_steps_per_second": 7.626, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.26, |
| "grad_norm": 0.005958515219390392, |
| "learning_rate": 7.483296213808464e-05, |
| "loss": 0.001, |
| "step": 2810 |
| }, |
| { |
| "epoch": 6.28, |
| "grad_norm": 0.005418773740530014, |
| "learning_rate": 7.438752783964366e-05, |
| "loss": 0.001, |
| "step": 2820 |
| }, |
| { |
| "epoch": 6.3, |
| "grad_norm": 0.004709057509899139, |
| "learning_rate": 7.394209354120267e-05, |
| "loss": 0.0009, |
| "step": 2830 |
| }, |
| { |
| "epoch": 6.33, |
| "grad_norm": 0.0029449143912643194, |
| "learning_rate": 7.34966592427617e-05, |
| "loss": 0.0254, |
| "step": 2840 |
| }, |
| { |
| "epoch": 6.35, |
| "grad_norm": 0.005954895168542862, |
| "learning_rate": 7.305122494432071e-05, |
| "loss": 0.001, |
| "step": 2850 |
| }, |
| { |
| "epoch": 6.37, |
| "grad_norm": 0.004912202246487141, |
| "learning_rate": 7.260579064587974e-05, |
| "loss": 0.001, |
| "step": 2860 |
| }, |
| { |
| "epoch": 6.39, |
| "grad_norm": 0.005968924146145582, |
| "learning_rate": 7.216035634743875e-05, |
| "loss": 0.0011, |
| "step": 2870 |
| }, |
| { |
| "epoch": 6.41, |
| "grad_norm": 0.006199703551828861, |
| "learning_rate": 7.171492204899778e-05, |
| "loss": 0.0009, |
| "step": 2880 |
| }, |
| { |
| "epoch": 6.44, |
| "grad_norm": 0.005734401289373636, |
| "learning_rate": 7.126948775055679e-05, |
| "loss": 0.001, |
| "step": 2890 |
| }, |
| { |
| "epoch": 6.46, |
| "grad_norm": 0.0027692036237567663, |
| "learning_rate": 7.082405345211582e-05, |
| "loss": 0.0009, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.46, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.061346929520368576, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.7934, |
| "eval_samples_per_second": 60.738, |
| "eval_steps_per_second": 7.622, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.48, |
| "grad_norm": 0.006286944262683392, |
| "learning_rate": 7.037861915367485e-05, |
| "loss": 0.0008, |
| "step": 2910 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 0.0025479758623987436, |
| "learning_rate": 6.993318485523386e-05, |
| "loss": 0.0009, |
| "step": 2920 |
| }, |
| { |
| "epoch": 6.53, |
| "grad_norm": 0.003326891688629985, |
| "learning_rate": 6.948775055679287e-05, |
| "loss": 0.001, |
| "step": 2930 |
| }, |
| { |
| "epoch": 6.55, |
| "grad_norm": 0.005029771476984024, |
| "learning_rate": 6.904231625835188e-05, |
| "loss": 0.0009, |
| "step": 2940 |
| }, |
| { |
| "epoch": 6.57, |
| "grad_norm": 0.00435183709487319, |
| "learning_rate": 6.859688195991092e-05, |
| "loss": 0.001, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.59, |
| "grad_norm": 0.005539336241781712, |
| "learning_rate": 6.815144766146994e-05, |
| "loss": 0.0009, |
| "step": 2960 |
| }, |
| { |
| "epoch": 6.61, |
| "grad_norm": 0.0029067988507449627, |
| "learning_rate": 6.770601336302895e-05, |
| "loss": 0.0008, |
| "step": 2970 |
| }, |
| { |
| "epoch": 6.64, |
| "grad_norm": 0.0038986029103398323, |
| "learning_rate": 6.726057906458798e-05, |
| "loss": 0.0008, |
| "step": 2980 |
| }, |
| { |
| "epoch": 6.66, |
| "grad_norm": 0.0038106811698526144, |
| "learning_rate": 6.6815144766147e-05, |
| "loss": 0.0008, |
| "step": 2990 |
| }, |
| { |
| "epoch": 6.68, |
| "grad_norm": 0.00427450193092227, |
| "learning_rate": 6.636971046770602e-05, |
| "loss": 0.0354, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.68, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06250577419996262, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.9581, |
| "eval_samples_per_second": 60.148, |
| "eval_steps_per_second": 7.548, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.7, |
| "grad_norm": 0.0054627833887934685, |
| "learning_rate": 6.592427616926503e-05, |
| "loss": 0.0009, |
| "step": 3010 |
| }, |
| { |
| "epoch": 6.73, |
| "grad_norm": 0.0041511994786560535, |
| "learning_rate": 6.547884187082406e-05, |
| "loss": 0.001, |
| "step": 3020 |
| }, |
| { |
| "epoch": 6.75, |
| "grad_norm": 0.0025098640471696854, |
| "learning_rate": 6.503340757238308e-05, |
| "loss": 0.0009, |
| "step": 3030 |
| }, |
| { |
| "epoch": 6.77, |
| "grad_norm": 0.09074518829584122, |
| "learning_rate": 6.45879732739421e-05, |
| "loss": 0.001, |
| "step": 3040 |
| }, |
| { |
| "epoch": 6.79, |
| "grad_norm": 0.0031906042713671923, |
| "learning_rate": 6.414253897550112e-05, |
| "loss": 0.0008, |
| "step": 3050 |
| }, |
| { |
| "epoch": 6.82, |
| "grad_norm": 0.006882569286972284, |
| "learning_rate": 6.369710467706013e-05, |
| "loss": 0.001, |
| "step": 3060 |
| }, |
| { |
| "epoch": 6.84, |
| "grad_norm": 0.18022125959396362, |
| "learning_rate": 6.325167037861916e-05, |
| "loss": 0.0013, |
| "step": 3070 |
| }, |
| { |
| "epoch": 6.86, |
| "grad_norm": 0.003266548039391637, |
| "learning_rate": 6.280623608017817e-05, |
| "loss": 0.0009, |
| "step": 3080 |
| }, |
| { |
| "epoch": 6.88, |
| "grad_norm": 0.0020031100139021873, |
| "learning_rate": 6.23608017817372e-05, |
| "loss": 0.0008, |
| "step": 3090 |
| }, |
| { |
| "epoch": 6.9, |
| "grad_norm": 0.004786783363670111, |
| "learning_rate": 6.191536748329621e-05, |
| "loss": 0.0009, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.9, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06396353989839554, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.7448, |
| "eval_samples_per_second": 60.914, |
| "eval_steps_per_second": 7.644, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.93, |
| "grad_norm": 0.003442308632656932, |
| "learning_rate": 6.146993318485523e-05, |
| "loss": 0.0007, |
| "step": 3110 |
| }, |
| { |
| "epoch": 6.95, |
| "grad_norm": 0.0047423760406672955, |
| "learning_rate": 6.102449888641426e-05, |
| "loss": 0.0008, |
| "step": 3120 |
| }, |
| { |
| "epoch": 6.97, |
| "grad_norm": 0.0036736875772476196, |
| "learning_rate": 6.057906458797328e-05, |
| "loss": 0.0008, |
| "step": 3130 |
| }, |
| { |
| "epoch": 6.99, |
| "grad_norm": 0.003000143449753523, |
| "learning_rate": 6.013363028953229e-05, |
| "loss": 0.0008, |
| "step": 3140 |
| }, |
| { |
| "epoch": 7.02, |
| "grad_norm": 0.003053349442780018, |
| "learning_rate": 5.9688195991091325e-05, |
| "loss": 0.0012, |
| "step": 3150 |
| }, |
| { |
| "epoch": 7.04, |
| "grad_norm": 0.003400023328140378, |
| "learning_rate": 5.924276169265034e-05, |
| "loss": 0.0008, |
| "step": 3160 |
| }, |
| { |
| "epoch": 7.06, |
| "grad_norm": 0.0021733990870416164, |
| "learning_rate": 5.879732739420936e-05, |
| "loss": 0.0008, |
| "step": 3170 |
| }, |
| { |
| "epoch": 7.08, |
| "grad_norm": 0.0028391126543283463, |
| "learning_rate": 5.835189309576837e-05, |
| "loss": 0.0007, |
| "step": 3180 |
| }, |
| { |
| "epoch": 7.1, |
| "grad_norm": 0.0022689776960760355, |
| "learning_rate": 5.79064587973274e-05, |
| "loss": 0.0008, |
| "step": 3190 |
| }, |
| { |
| "epoch": 7.13, |
| "grad_norm": 0.00372182740829885, |
| "learning_rate": 5.746102449888642e-05, |
| "loss": 0.0009, |
| "step": 3200 |
| }, |
| { |
| "epoch": 7.13, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06368714570999146, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 17.2827, |
| "eval_samples_per_second": 59.018, |
| "eval_steps_per_second": 7.406, |
| "step": 3200 |
| }, |
| { |
| "epoch": 7.15, |
| "grad_norm": 0.004794612992554903, |
| "learning_rate": 5.701559020044544e-05, |
| "loss": 0.0007, |
| "step": 3210 |
| }, |
| { |
| "epoch": 7.17, |
| "grad_norm": 0.00307331676594913, |
| "learning_rate": 5.6570155902004463e-05, |
| "loss": 0.0008, |
| "step": 3220 |
| }, |
| { |
| "epoch": 7.19, |
| "grad_norm": 0.0037229089066386223, |
| "learning_rate": 5.6124721603563476e-05, |
| "loss": 0.0007, |
| "step": 3230 |
| }, |
| { |
| "epoch": 7.22, |
| "grad_norm": 0.0043710386380553246, |
| "learning_rate": 5.5679287305122496e-05, |
| "loss": 0.0068, |
| "step": 3240 |
| }, |
| { |
| "epoch": 7.24, |
| "grad_norm": 0.0028628443833440542, |
| "learning_rate": 5.5233853006681516e-05, |
| "loss": 0.0007, |
| "step": 3250 |
| }, |
| { |
| "epoch": 7.26, |
| "grad_norm": 0.002583292778581381, |
| "learning_rate": 5.478841870824054e-05, |
| "loss": 0.0008, |
| "step": 3260 |
| }, |
| { |
| "epoch": 7.28, |
| "grad_norm": 0.002930576680228114, |
| "learning_rate": 5.4342984409799555e-05, |
| "loss": 0.0008, |
| "step": 3270 |
| }, |
| { |
| "epoch": 7.31, |
| "grad_norm": 0.0036957694683223963, |
| "learning_rate": 5.3897550111358575e-05, |
| "loss": 0.0008, |
| "step": 3280 |
| }, |
| { |
| "epoch": 7.33, |
| "grad_norm": 0.0022817254066467285, |
| "learning_rate": 5.34521158129176e-05, |
| "loss": 0.0008, |
| "step": 3290 |
| }, |
| { |
| "epoch": 7.35, |
| "grad_norm": 0.004223665222525597, |
| "learning_rate": 5.300668151447662e-05, |
| "loss": 0.0008, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.35, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06401467323303223, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.8904, |
| "eval_samples_per_second": 60.389, |
| "eval_steps_per_second": 7.578, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.37, |
| "grad_norm": 0.0030589376110583544, |
| "learning_rate": 5.2561247216035634e-05, |
| "loss": 0.0008, |
| "step": 3310 |
| }, |
| { |
| "epoch": 7.39, |
| "grad_norm": 0.0033100908622145653, |
| "learning_rate": 5.2115812917594654e-05, |
| "loss": 0.0007, |
| "step": 3320 |
| }, |
| { |
| "epoch": 7.42, |
| "grad_norm": 0.0021207653917372227, |
| "learning_rate": 5.167037861915368e-05, |
| "loss": 0.0007, |
| "step": 3330 |
| }, |
| { |
| "epoch": 7.44, |
| "grad_norm": 0.004427778068929911, |
| "learning_rate": 5.12249443207127e-05, |
| "loss": 0.0008, |
| "step": 3340 |
| }, |
| { |
| "epoch": 7.46, |
| "grad_norm": 0.002078037243336439, |
| "learning_rate": 5.077951002227171e-05, |
| "loss": 0.0006, |
| "step": 3350 |
| }, |
| { |
| "epoch": 7.48, |
| "grad_norm": 0.005685769021511078, |
| "learning_rate": 5.033407572383074e-05, |
| "loss": 0.0008, |
| "step": 3360 |
| }, |
| { |
| "epoch": 7.51, |
| "grad_norm": 0.0038398895412683487, |
| "learning_rate": 4.988864142538976e-05, |
| "loss": 0.0006, |
| "step": 3370 |
| }, |
| { |
| "epoch": 7.53, |
| "grad_norm": 0.0034891765099018812, |
| "learning_rate": 4.944320712694878e-05, |
| "loss": 0.0007, |
| "step": 3380 |
| }, |
| { |
| "epoch": 7.55, |
| "grad_norm": 0.0021167476661503315, |
| "learning_rate": 4.89977728285078e-05, |
| "loss": 0.0008, |
| "step": 3390 |
| }, |
| { |
| "epoch": 7.57, |
| "grad_norm": 0.003661705646663904, |
| "learning_rate": 4.855233853006682e-05, |
| "loss": 0.0007, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.57, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06384364515542984, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.9734, |
| "eval_samples_per_second": 60.094, |
| "eval_steps_per_second": 7.541, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.59, |
| "grad_norm": 0.0037693388294428587, |
| "learning_rate": 4.810690423162584e-05, |
| "loss": 0.0008, |
| "step": 3410 |
| }, |
| { |
| "epoch": 7.62, |
| "grad_norm": 0.003122014459222555, |
| "learning_rate": 4.766146993318486e-05, |
| "loss": 0.0006, |
| "step": 3420 |
| }, |
| { |
| "epoch": 7.64, |
| "grad_norm": 0.0021702463272958994, |
| "learning_rate": 4.721603563474388e-05, |
| "loss": 0.0008, |
| "step": 3430 |
| }, |
| { |
| "epoch": 7.66, |
| "grad_norm": 0.0018209881382063031, |
| "learning_rate": 4.67706013363029e-05, |
| "loss": 0.0007, |
| "step": 3440 |
| }, |
| { |
| "epoch": 7.68, |
| "grad_norm": 0.002424059435725212, |
| "learning_rate": 4.632516703786192e-05, |
| "loss": 0.0007, |
| "step": 3450 |
| }, |
| { |
| "epoch": 7.71, |
| "grad_norm": 0.0038027861155569553, |
| "learning_rate": 4.5879732739420936e-05, |
| "loss": 0.0007, |
| "step": 3460 |
| }, |
| { |
| "epoch": 7.73, |
| "grad_norm": 0.0030559494625777006, |
| "learning_rate": 4.5434298440979956e-05, |
| "loss": 0.0008, |
| "step": 3470 |
| }, |
| { |
| "epoch": 7.75, |
| "grad_norm": 0.0016161584062501788, |
| "learning_rate": 4.4988864142538976e-05, |
| "loss": 0.0007, |
| "step": 3480 |
| }, |
| { |
| "epoch": 7.77, |
| "grad_norm": 1.0589083433151245, |
| "learning_rate": 4.4543429844098e-05, |
| "loss": 0.0271, |
| "step": 3490 |
| }, |
| { |
| "epoch": 7.8, |
| "grad_norm": 0.002775995759293437, |
| "learning_rate": 4.4097995545657015e-05, |
| "loss": 0.0008, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.8, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06323155015707016, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 17.0863, |
| "eval_samples_per_second": 59.697, |
| "eval_steps_per_second": 7.491, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.82, |
| "grad_norm": 0.0030816958751529455, |
| "learning_rate": 4.365256124721604e-05, |
| "loss": 0.0007, |
| "step": 3510 |
| }, |
| { |
| "epoch": 7.84, |
| "grad_norm": 0.002927512163296342, |
| "learning_rate": 4.3207126948775055e-05, |
| "loss": 0.0007, |
| "step": 3520 |
| }, |
| { |
| "epoch": 7.86, |
| "grad_norm": 0.0032380030024796724, |
| "learning_rate": 4.276169265033408e-05, |
| "loss": 0.0007, |
| "step": 3530 |
| }, |
| { |
| "epoch": 7.88, |
| "grad_norm": 0.004015618935227394, |
| "learning_rate": 4.2316258351893094e-05, |
| "loss": 0.0006, |
| "step": 3540 |
| }, |
| { |
| "epoch": 7.91, |
| "grad_norm": 0.0043089911341667175, |
| "learning_rate": 4.187082405345212e-05, |
| "loss": 0.0008, |
| "step": 3550 |
| }, |
| { |
| "epoch": 7.93, |
| "grad_norm": 0.0034070268739014864, |
| "learning_rate": 4.142538975501114e-05, |
| "loss": 0.0007, |
| "step": 3560 |
| }, |
| { |
| "epoch": 7.95, |
| "grad_norm": 0.002833964768797159, |
| "learning_rate": 4.097995545657016e-05, |
| "loss": 0.0007, |
| "step": 3570 |
| }, |
| { |
| "epoch": 7.97, |
| "grad_norm": 0.002836576197296381, |
| "learning_rate": 4.053452115812918e-05, |
| "loss": 0.0007, |
| "step": 3580 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.0018820447148755193, |
| "learning_rate": 4.00890868596882e-05, |
| "loss": 0.0006, |
| "step": 3590 |
| }, |
| { |
| "epoch": 8.02, |
| "grad_norm": 0.002963653299957514, |
| "learning_rate": 3.964365256124722e-05, |
| "loss": 0.0006, |
| "step": 3600 |
| }, |
| { |
| "epoch": 8.02, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06277049332857132, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.8786, |
| "eval_samples_per_second": 60.432, |
| "eval_steps_per_second": 7.584, |
| "step": 3600 |
| }, |
| { |
| "epoch": 8.04, |
| "grad_norm": 0.0024287349078804255, |
| "learning_rate": 3.919821826280624e-05, |
| "loss": 0.0006, |
| "step": 3610 |
| }, |
| { |
| "epoch": 8.06, |
| "grad_norm": 0.0035882722586393356, |
| "learning_rate": 3.875278396436526e-05, |
| "loss": 0.0007, |
| "step": 3620 |
| }, |
| { |
| "epoch": 8.08, |
| "grad_norm": 0.3669194281101227, |
| "learning_rate": 3.830734966592428e-05, |
| "loss": 0.0038, |
| "step": 3630 |
| }, |
| { |
| "epoch": 8.11, |
| "grad_norm": 0.0030698508489876986, |
| "learning_rate": 3.78619153674833e-05, |
| "loss": 0.0007, |
| "step": 3640 |
| }, |
| { |
| "epoch": 8.13, |
| "grad_norm": 0.0035393694415688515, |
| "learning_rate": 3.741648106904232e-05, |
| "loss": 0.0007, |
| "step": 3650 |
| }, |
| { |
| "epoch": 8.15, |
| "grad_norm": 0.0043913149274885654, |
| "learning_rate": 3.697104677060134e-05, |
| "loss": 0.0007, |
| "step": 3660 |
| }, |
| { |
| "epoch": 8.17, |
| "grad_norm": 0.003033293876796961, |
| "learning_rate": 3.652561247216036e-05, |
| "loss": 0.0008, |
| "step": 3670 |
| }, |
| { |
| "epoch": 8.2, |
| "grad_norm": 0.002264636103063822, |
| "learning_rate": 3.608017817371938e-05, |
| "loss": 0.0006, |
| "step": 3680 |
| }, |
| { |
| "epoch": 8.22, |
| "grad_norm": 0.0028679061215370893, |
| "learning_rate": 3.5634743875278396e-05, |
| "loss": 0.0007, |
| "step": 3690 |
| }, |
| { |
| "epoch": 8.24, |
| "grad_norm": 0.00327710947021842, |
| "learning_rate": 3.518930957683742e-05, |
| "loss": 0.0006, |
| "step": 3700 |
| }, |
| { |
| "epoch": 8.24, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06276960670948029, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.7716, |
| "eval_samples_per_second": 60.817, |
| "eval_steps_per_second": 7.632, |
| "step": 3700 |
| }, |
| { |
| "epoch": 8.26, |
| "grad_norm": 0.0032630411442369223, |
| "learning_rate": 3.4743875278396436e-05, |
| "loss": 0.0007, |
| "step": 3710 |
| }, |
| { |
| "epoch": 8.29, |
| "grad_norm": 0.004381363280117512, |
| "learning_rate": 3.429844097995546e-05, |
| "loss": 0.0006, |
| "step": 3720 |
| }, |
| { |
| "epoch": 8.31, |
| "grad_norm": 0.002072981558740139, |
| "learning_rate": 3.3853006681514475e-05, |
| "loss": 0.0006, |
| "step": 3730 |
| }, |
| { |
| "epoch": 8.33, |
| "grad_norm": 0.004002034664154053, |
| "learning_rate": 3.34075723830735e-05, |
| "loss": 0.0007, |
| "step": 3740 |
| }, |
| { |
| "epoch": 8.35, |
| "grad_norm": 0.002828976372256875, |
| "learning_rate": 3.2962138084632515e-05, |
| "loss": 0.0007, |
| "step": 3750 |
| }, |
| { |
| "epoch": 8.37, |
| "grad_norm": 0.002102539176121354, |
| "learning_rate": 3.251670378619154e-05, |
| "loss": 0.0006, |
| "step": 3760 |
| }, |
| { |
| "epoch": 8.4, |
| "grad_norm": 0.0019427842926234007, |
| "learning_rate": 3.207126948775056e-05, |
| "loss": 0.0006, |
| "step": 3770 |
| }, |
| { |
| "epoch": 8.42, |
| "grad_norm": 0.0024794205091893673, |
| "learning_rate": 3.162583518930958e-05, |
| "loss": 0.0006, |
| "step": 3780 |
| }, |
| { |
| "epoch": 8.44, |
| "grad_norm": 0.003910183906555176, |
| "learning_rate": 3.11804008908686e-05, |
| "loss": 0.0007, |
| "step": 3790 |
| }, |
| { |
| "epoch": 8.46, |
| "grad_norm": 0.0024412323255091906, |
| "learning_rate": 3.073496659242761e-05, |
| "loss": 0.0007, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.46, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06286178529262543, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 17.0003, |
| "eval_samples_per_second": 59.999, |
| "eval_steps_per_second": 7.529, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.49, |
| "grad_norm": 0.002674848074093461, |
| "learning_rate": 3.028953229398664e-05, |
| "loss": 0.0006, |
| "step": 3810 |
| }, |
| { |
| "epoch": 8.51, |
| "grad_norm": 0.0029363699723035097, |
| "learning_rate": 2.9844097995545663e-05, |
| "loss": 0.0007, |
| "step": 3820 |
| }, |
| { |
| "epoch": 8.53, |
| "grad_norm": 0.0016088394913822412, |
| "learning_rate": 2.939866369710468e-05, |
| "loss": 0.0005, |
| "step": 3830 |
| }, |
| { |
| "epoch": 8.55, |
| "grad_norm": 0.0027649877592921257, |
| "learning_rate": 2.89532293986637e-05, |
| "loss": 0.0006, |
| "step": 3840 |
| }, |
| { |
| "epoch": 8.57, |
| "grad_norm": 0.0022558195050805807, |
| "learning_rate": 2.850779510022272e-05, |
| "loss": 0.0006, |
| "step": 3850 |
| }, |
| { |
| "epoch": 8.6, |
| "grad_norm": 0.0031748777255415916, |
| "learning_rate": 2.8062360801781738e-05, |
| "loss": 0.0007, |
| "step": 3860 |
| }, |
| { |
| "epoch": 8.62, |
| "grad_norm": 0.003781731706112623, |
| "learning_rate": 2.7616926503340758e-05, |
| "loss": 0.0007, |
| "step": 3870 |
| }, |
| { |
| "epoch": 8.64, |
| "grad_norm": 0.0021370695903897285, |
| "learning_rate": 2.7171492204899778e-05, |
| "loss": 0.0006, |
| "step": 3880 |
| }, |
| { |
| "epoch": 8.66, |
| "grad_norm": 0.0020453929901123047, |
| "learning_rate": 2.67260579064588e-05, |
| "loss": 0.0006, |
| "step": 3890 |
| }, |
| { |
| "epoch": 8.69, |
| "grad_norm": 0.0017306358786299825, |
| "learning_rate": 2.6280623608017817e-05, |
| "loss": 0.0006, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.69, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06279093772172928, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.9565, |
| "eval_samples_per_second": 60.154, |
| "eval_steps_per_second": 7.549, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.71, |
| "grad_norm": 0.0038726001512259245, |
| "learning_rate": 2.583518930957684e-05, |
| "loss": 0.0149, |
| "step": 3910 |
| }, |
| { |
| "epoch": 8.73, |
| "grad_norm": 0.002596154110506177, |
| "learning_rate": 2.5389755011135856e-05, |
| "loss": 0.0006, |
| "step": 3920 |
| }, |
| { |
| "epoch": 8.75, |
| "grad_norm": 0.0026898845098912716, |
| "learning_rate": 2.494432071269488e-05, |
| "loss": 0.0007, |
| "step": 3930 |
| }, |
| { |
| "epoch": 8.78, |
| "grad_norm": 0.0032163651194423437, |
| "learning_rate": 2.44988864142539e-05, |
| "loss": 0.0006, |
| "step": 3940 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 0.0022637268994003534, |
| "learning_rate": 2.405345211581292e-05, |
| "loss": 0.0006, |
| "step": 3950 |
| }, |
| { |
| "epoch": 8.82, |
| "grad_norm": 0.0022868013475090265, |
| "learning_rate": 2.360801781737194e-05, |
| "loss": 0.0006, |
| "step": 3960 |
| }, |
| { |
| "epoch": 8.84, |
| "grad_norm": 0.002997064031660557, |
| "learning_rate": 2.316258351893096e-05, |
| "loss": 0.0006, |
| "step": 3970 |
| }, |
| { |
| "epoch": 8.86, |
| "grad_norm": 0.0025922399945557117, |
| "learning_rate": 2.2717149220489978e-05, |
| "loss": 0.0006, |
| "step": 3980 |
| }, |
| { |
| "epoch": 8.89, |
| "grad_norm": 0.0017196180997416377, |
| "learning_rate": 2.2271714922049e-05, |
| "loss": 0.0006, |
| "step": 3990 |
| }, |
| { |
| "epoch": 8.91, |
| "grad_norm": 0.002627770882099867, |
| "learning_rate": 2.182628062360802e-05, |
| "loss": 0.0007, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.91, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06286938488483429, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.93, |
| "eval_samples_per_second": 60.248, |
| "eval_steps_per_second": 7.561, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.93, |
| "grad_norm": 0.0023368040565401316, |
| "learning_rate": 2.138084632516704e-05, |
| "loss": 0.0005, |
| "step": 4010 |
| }, |
| { |
| "epoch": 8.95, |
| "grad_norm": 0.0016646678559482098, |
| "learning_rate": 2.093541202672606e-05, |
| "loss": 0.0006, |
| "step": 4020 |
| }, |
| { |
| "epoch": 8.98, |
| "grad_norm": 0.0031739012338221073, |
| "learning_rate": 2.048997772828508e-05, |
| "loss": 0.0006, |
| "step": 4030 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.001964448019862175, |
| "learning_rate": 2.00445434298441e-05, |
| "loss": 0.0007, |
| "step": 4040 |
| }, |
| { |
| "epoch": 9.02, |
| "grad_norm": 0.002518624998629093, |
| "learning_rate": 1.959910913140312e-05, |
| "loss": 0.0006, |
| "step": 4050 |
| }, |
| { |
| "epoch": 9.04, |
| "grad_norm": 0.002754157641902566, |
| "learning_rate": 1.915367483296214e-05, |
| "loss": 0.0006, |
| "step": 4060 |
| }, |
| { |
| "epoch": 9.06, |
| "grad_norm": 0.0033753858879208565, |
| "learning_rate": 1.870824053452116e-05, |
| "loss": 0.0006, |
| "step": 4070 |
| }, |
| { |
| "epoch": 9.09, |
| "grad_norm": 0.42151451110839844, |
| "learning_rate": 1.826280623608018e-05, |
| "loss": 0.0042, |
| "step": 4080 |
| }, |
| { |
| "epoch": 9.11, |
| "grad_norm": 0.002747293096035719, |
| "learning_rate": 1.7817371937639198e-05, |
| "loss": 0.0006, |
| "step": 4090 |
| }, |
| { |
| "epoch": 9.13, |
| "grad_norm": 0.0017670992529019713, |
| "learning_rate": 1.7371937639198218e-05, |
| "loss": 0.0006, |
| "step": 4100 |
| }, |
| { |
| "epoch": 9.13, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.0628853365778923, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 17.0193, |
| "eval_samples_per_second": 59.932, |
| "eval_steps_per_second": 7.521, |
| "step": 4100 |
| }, |
| { |
| "epoch": 9.15, |
| "grad_norm": 0.004704649560153484, |
| "learning_rate": 1.6926503340757238e-05, |
| "loss": 0.0006, |
| "step": 4110 |
| }, |
| { |
| "epoch": 9.18, |
| "grad_norm": 0.0014927310403436422, |
| "learning_rate": 1.6481069042316257e-05, |
| "loss": 0.0006, |
| "step": 4120 |
| }, |
| { |
| "epoch": 9.2, |
| "grad_norm": 0.002436956623569131, |
| "learning_rate": 1.603563474387528e-05, |
| "loss": 0.0006, |
| "step": 4130 |
| }, |
| { |
| "epoch": 9.22, |
| "grad_norm": 0.0027117161080241203, |
| "learning_rate": 1.55902004454343e-05, |
| "loss": 0.0006, |
| "step": 4140 |
| }, |
| { |
| "epoch": 9.24, |
| "grad_norm": 0.0022878183517605066, |
| "learning_rate": 1.514476614699332e-05, |
| "loss": 0.0006, |
| "step": 4150 |
| }, |
| { |
| "epoch": 9.27, |
| "grad_norm": 0.0029811938293278217, |
| "learning_rate": 1.469933184855234e-05, |
| "loss": 0.0006, |
| "step": 4160 |
| }, |
| { |
| "epoch": 9.29, |
| "grad_norm": 0.003581145778298378, |
| "learning_rate": 1.425389755011136e-05, |
| "loss": 0.0006, |
| "step": 4170 |
| }, |
| { |
| "epoch": 9.31, |
| "grad_norm": 0.004033802077174187, |
| "learning_rate": 1.3808463251670379e-05, |
| "loss": 0.0006, |
| "step": 4180 |
| }, |
| { |
| "epoch": 9.33, |
| "grad_norm": 0.002811565762385726, |
| "learning_rate": 1.33630289532294e-05, |
| "loss": 0.0006, |
| "step": 4190 |
| }, |
| { |
| "epoch": 9.35, |
| "grad_norm": 0.002183671807870269, |
| "learning_rate": 1.291759465478842e-05, |
| "loss": 0.0005, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.35, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.0628632977604866, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 17.1478, |
| "eval_samples_per_second": 59.483, |
| "eval_steps_per_second": 7.465, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.38, |
| "grad_norm": 0.002663408173248172, |
| "learning_rate": 1.247216035634744e-05, |
| "loss": 0.0005, |
| "step": 4210 |
| }, |
| { |
| "epoch": 9.4, |
| "grad_norm": 0.0021959999576210976, |
| "learning_rate": 1.202672605790646e-05, |
| "loss": 0.0006, |
| "step": 4220 |
| }, |
| { |
| "epoch": 9.42, |
| "grad_norm": 0.0024719948414713144, |
| "learning_rate": 1.158129175946548e-05, |
| "loss": 0.0007, |
| "step": 4230 |
| }, |
| { |
| "epoch": 9.44, |
| "grad_norm": 0.002360550919547677, |
| "learning_rate": 1.11358574610245e-05, |
| "loss": 0.0006, |
| "step": 4240 |
| }, |
| { |
| "epoch": 9.47, |
| "grad_norm": 0.002426006831228733, |
| "learning_rate": 1.069042316258352e-05, |
| "loss": 0.0006, |
| "step": 4250 |
| }, |
| { |
| "epoch": 9.49, |
| "grad_norm": 0.002104952232912183, |
| "learning_rate": 1.024498886414254e-05, |
| "loss": 0.0006, |
| "step": 4260 |
| }, |
| { |
| "epoch": 9.51, |
| "grad_norm": 0.002701717661693692, |
| "learning_rate": 9.79955456570156e-06, |
| "loss": 0.0006, |
| "step": 4270 |
| }, |
| { |
| "epoch": 9.53, |
| "grad_norm": 0.0030042980797588825, |
| "learning_rate": 9.35412026726058e-06, |
| "loss": 0.0006, |
| "step": 4280 |
| }, |
| { |
| "epoch": 9.55, |
| "grad_norm": 0.0021413913927972317, |
| "learning_rate": 8.908685968819599e-06, |
| "loss": 0.0006, |
| "step": 4290 |
| }, |
| { |
| "epoch": 9.58, |
| "grad_norm": 0.0034586272668093443, |
| "learning_rate": 8.463251670378619e-06, |
| "loss": 0.0006, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.58, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.0628676787018776, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 17.1566, |
| "eval_samples_per_second": 59.453, |
| "eval_steps_per_second": 7.461, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 0.002973487600684166, |
| "learning_rate": 8.01781737193764e-06, |
| "loss": 0.0006, |
| "step": 4310 |
| }, |
| { |
| "epoch": 9.62, |
| "grad_norm": 0.00266460282728076, |
| "learning_rate": 7.57238307349666e-06, |
| "loss": 0.0005, |
| "step": 4320 |
| }, |
| { |
| "epoch": 9.64, |
| "grad_norm": 0.0035296238493174314, |
| "learning_rate": 7.12694877505568e-06, |
| "loss": 0.0006, |
| "step": 4330 |
| }, |
| { |
| "epoch": 9.67, |
| "grad_norm": 0.0026424438692629337, |
| "learning_rate": 6.6815144766147e-06, |
| "loss": 0.0006, |
| "step": 4340 |
| }, |
| { |
| "epoch": 9.69, |
| "grad_norm": 0.003232095157727599, |
| "learning_rate": 6.23608017817372e-06, |
| "loss": 0.0006, |
| "step": 4350 |
| }, |
| { |
| "epoch": 9.71, |
| "grad_norm": 0.00274029653519392, |
| "learning_rate": 5.79064587973274e-06, |
| "loss": 0.0006, |
| "step": 4360 |
| }, |
| { |
| "epoch": 9.73, |
| "grad_norm": 0.001976421568542719, |
| "learning_rate": 5.34521158129176e-06, |
| "loss": 0.0005, |
| "step": 4370 |
| }, |
| { |
| "epoch": 9.76, |
| "grad_norm": 0.0021137825679033995, |
| "learning_rate": 4.89977728285078e-06, |
| "loss": 0.0006, |
| "step": 4380 |
| }, |
| { |
| "epoch": 9.78, |
| "grad_norm": 0.003050548257306218, |
| "learning_rate": 4.4543429844097995e-06, |
| "loss": 0.0005, |
| "step": 4390 |
| }, |
| { |
| "epoch": 9.8, |
| "grad_norm": 0.0019041696796193719, |
| "learning_rate": 4.00890868596882e-06, |
| "loss": 0.0006, |
| "step": 4400 |
| }, |
| { |
| "epoch": 9.8, |
| "eval_accuracy": 0.9862745098039216, |
| "eval_f1": 0.9862745098039216, |
| "eval_loss": 0.06287658959627151, |
| "eval_precision": 0.9862745098039216, |
| "eval_recall": 0.9862745098039216, |
| "eval_runtime": 16.9237, |
| "eval_samples_per_second": 60.27, |
| "eval_steps_per_second": 7.563, |
| "step": 4400 |
| }, |
| { |
| "epoch": 9.82, |
| "grad_norm": 0.003077227622270584, |
| "learning_rate": 3.56347438752784e-06, |
| "loss": 0.0006, |
| "step": 4410 |
| }, |
| { |
| "epoch": 9.84, |
| "grad_norm": 0.0018154334975406528, |
| "learning_rate": 3.11804008908686e-06, |
| "loss": 0.0005, |
| "step": 4420 |
| }, |
| { |
| "epoch": 9.87, |
| "grad_norm": 0.0031176796182990074, |
| "learning_rate": 2.67260579064588e-06, |
| "loss": 0.0006, |
| "step": 4430 |
| }, |
| { |
| "epoch": 9.89, |
| "grad_norm": 0.0013539218343794346, |
| "learning_rate": 2.2271714922048998e-06, |
| "loss": 0.0006, |
| "step": 4440 |
| }, |
| { |
| "epoch": 9.91, |
| "grad_norm": 0.0016152034513652325, |
| "learning_rate": 1.78173719376392e-06, |
| "loss": 0.0006, |
| "step": 4450 |
| }, |
| { |
| "epoch": 9.93, |
| "grad_norm": 0.0019145664991810918, |
| "learning_rate": 1.33630289532294e-06, |
| "loss": 0.0006, |
| "step": 4460 |
| }, |
| { |
| "epoch": 9.96, |
| "grad_norm": 0.0021777807269245386, |
| "learning_rate": 8.9086859688196e-07, |
| "loss": 0.0006, |
| "step": 4470 |
| }, |
| { |
| "epoch": 9.98, |
| "grad_norm": 0.002236017258837819, |
| "learning_rate": 4.4543429844098e-07, |
| "loss": 0.0004, |
| "step": 4480 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.004857390653342009, |
| "learning_rate": 0.0, |
| "loss": 0.0101, |
| "step": 4490 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 4490, |
| "total_flos": 5.562769847811564e+18, |
| "train_loss": 0.11195627215104738, |
| "train_runtime": 3861.9509, |
| "train_samples_per_second": 18.563, |
| "train_steps_per_second": 1.163 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4490, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 100, |
| "total_flos": 5.562769847811564e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|