| { | |
| "best_metric": 0.15021921694278717, | |
| "best_model_checkpoint": "ViT_Flower102_2/checkpoint-1600", | |
| "epoch": 10.0, | |
| "eval_steps": 100, | |
| "global_step": 4490, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.0026773272547870874, | |
| "learning_rate": 0.00019955456570155904, | |
| "loss": 0.0009, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.005304301157593727, | |
| "learning_rate": 0.00019910913140311804, | |
| "loss": 0.0009, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 20.306926727294922, | |
| "learning_rate": 0.00019866369710467706, | |
| "loss": 0.0194, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.321718692779541, | |
| "learning_rate": 0.0001982182628062361, | |
| "loss": 0.1544, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.021045241504907608, | |
| "learning_rate": 0.00019777282850779511, | |
| "loss": 0.0701, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.02391223795711994, | |
| "learning_rate": 0.00019732739420935414, | |
| "loss": 0.1303, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.20434625446796417, | |
| "learning_rate": 0.00019688195991091317, | |
| "loss": 0.1413, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.2549870014190674, | |
| "learning_rate": 0.00019643652561247217, | |
| "loss": 0.1047, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 6.997387886047363, | |
| "learning_rate": 0.0001959910913140312, | |
| "loss": 0.1234, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 5.206876277923584, | |
| "learning_rate": 0.0001955456570155902, | |
| "loss": 0.053, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.9235294117647059, | |
| "eval_f1": 0.9235294117647059, | |
| "eval_loss": 0.3198450803756714, | |
| "eval_precision": 0.9235294117647059, | |
| "eval_recall": 0.9235294117647059, | |
| "eval_runtime": 17.2015, | |
| "eval_samples_per_second": 59.297, | |
| "eval_steps_per_second": 7.441, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 6.388282299041748, | |
| "learning_rate": 0.00019510022271714922, | |
| "loss": 0.1377, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 5.129618167877197, | |
| "learning_rate": 0.00019465478841870825, | |
| "loss": 0.0561, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 8.198286056518555, | |
| "learning_rate": 0.00019420935412026727, | |
| "loss": 0.0417, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 7.592113494873047, | |
| "learning_rate": 0.0001937639198218263, | |
| "loss": 0.0388, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 2.7412798404693604, | |
| "learning_rate": 0.00019331848552338533, | |
| "loss": 0.0921, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.12602120637893677, | |
| "learning_rate": 0.00019287305122494432, | |
| "loss": 0.1905, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 5.073046684265137, | |
| "learning_rate": 0.00019242761692650335, | |
| "loss": 0.1331, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.5400639176368713, | |
| "learning_rate": 0.00019198218262806238, | |
| "loss": 0.0315, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 11.878341674804688, | |
| "learning_rate": 0.00019153674832962138, | |
| "loss": 0.2794, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.48547011613845825, | |
| "learning_rate": 0.0001910913140311804, | |
| "loss": 0.1225, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.9166666666666666, | |
| "eval_f1": 0.9166666666666666, | |
| "eval_loss": 0.40865278244018555, | |
| "eval_precision": 0.9166666666666666, | |
| "eval_recall": 0.9166666666666666, | |
| "eval_runtime": 17.0881, | |
| "eval_samples_per_second": 59.691, | |
| "eval_steps_per_second": 7.491, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 0.015034608542919159, | |
| "learning_rate": 0.00019064587973273943, | |
| "loss": 0.0953, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.665817141532898, | |
| "learning_rate": 0.00019020044543429846, | |
| "loss": 0.081, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.737998127937317, | |
| "learning_rate": 0.00018975501113585748, | |
| "loss": 0.037, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.08088938146829605, | |
| "learning_rate": 0.00018930957683741648, | |
| "loss": 0.2113, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 6.148426055908203, | |
| "learning_rate": 0.0001888641425389755, | |
| "loss": 0.0674, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.07585727423429489, | |
| "learning_rate": 0.00018841870824053454, | |
| "loss": 0.1698, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 4.297791481018066, | |
| "learning_rate": 0.00018797327394209353, | |
| "loss": 0.1087, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.039582639932632446, | |
| "learning_rate": 0.00018752783964365256, | |
| "loss": 0.082, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 4.647770881652832, | |
| "learning_rate": 0.0001870824053452116, | |
| "loss": 0.0256, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 0.05055054649710655, | |
| "learning_rate": 0.00018663697104677061, | |
| "loss": 0.1985, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.9568627450980393, | |
| "eval_f1": 0.9568627450980393, | |
| "eval_loss": 0.20681221783161163, | |
| "eval_precision": 0.9568627450980393, | |
| "eval_recall": 0.9568627450980393, | |
| "eval_runtime": 17.1131, | |
| "eval_samples_per_second": 59.603, | |
| "eval_steps_per_second": 7.48, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 12.765353202819824, | |
| "learning_rate": 0.00018619153674832964, | |
| "loss": 0.0698, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 9.829974174499512, | |
| "learning_rate": 0.00018574610244988867, | |
| "loss": 0.076, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.024047628045082092, | |
| "learning_rate": 0.00018530066815144767, | |
| "loss": 0.0335, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 6.1861066818237305, | |
| "learning_rate": 0.0001848552338530067, | |
| "loss": 0.1085, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.6630871295928955, | |
| "learning_rate": 0.00018440979955456572, | |
| "loss": 0.1537, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.012870335020124912, | |
| "learning_rate": 0.00018396436525612472, | |
| "loss": 0.0729, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.9247069954872131, | |
| "learning_rate": 0.00018351893095768375, | |
| "loss": 0.0293, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 0.0049523478373885155, | |
| "learning_rate": 0.00018307349665924277, | |
| "loss": 0.043, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 6.6150336265563965, | |
| "learning_rate": 0.0001826280623608018, | |
| "loss": 0.0184, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 0.16694368422031403, | |
| "learning_rate": 0.00018218262806236082, | |
| "loss": 0.0804, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.9333333333333333, | |
| "eval_f1": 0.9333333333333333, | |
| "eval_loss": 0.3181270360946655, | |
| "eval_precision": 0.9333333333333333, | |
| "eval_recall": 0.9333333333333333, | |
| "eval_runtime": 16.8894, | |
| "eval_samples_per_second": 60.393, | |
| "eval_steps_per_second": 7.579, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 2.3894243240356445, | |
| "learning_rate": 0.00018173719376391982, | |
| "loss": 0.0611, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.014911720529198647, | |
| "learning_rate": 0.00018129175946547885, | |
| "loss": 0.0172, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.01411391980946064, | |
| "learning_rate": 0.00018084632516703788, | |
| "loss": 0.2164, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 13.27253532409668, | |
| "learning_rate": 0.00018040089086859688, | |
| "loss": 0.1459, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.028676768764853477, | |
| "learning_rate": 0.0001799554565701559, | |
| "loss": 0.0702, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 4.484796047210693, | |
| "learning_rate": 0.00017951002227171493, | |
| "loss": 0.1319, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 5.17644739151001, | |
| "learning_rate": 0.00017906458797327396, | |
| "loss": 0.1276, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 4.405980110168457, | |
| "learning_rate": 0.00017861915367483298, | |
| "loss": 0.1718, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 0.1352098286151886, | |
| "learning_rate": 0.000178173719376392, | |
| "loss": 0.1796, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 8.556909561157227, | |
| "learning_rate": 0.000177728285077951, | |
| "loss": 0.1672, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_accuracy": 0.9274509803921569, | |
| "eval_f1": 0.9274509803921569, | |
| "eval_loss": 0.35819730162620544, | |
| "eval_precision": 0.9274509803921569, | |
| "eval_recall": 0.9274509803921569, | |
| "eval_runtime": 16.8086, | |
| "eval_samples_per_second": 60.683, | |
| "eval_steps_per_second": 7.615, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 0.4172447919845581, | |
| "learning_rate": 0.00017728285077951003, | |
| "loss": 0.0686, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 3.2105464935302734, | |
| "learning_rate": 0.00017683741648106903, | |
| "loss": 0.0347, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 0.23049691319465637, | |
| "learning_rate": 0.00017639198218262806, | |
| "loss": 0.1714, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.008479436859488487, | |
| "learning_rate": 0.0001759465478841871, | |
| "loss": 0.0519, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 0.11945914477109909, | |
| "learning_rate": 0.00017550111358574611, | |
| "loss": 0.0858, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.7262502908706665, | |
| "learning_rate": 0.00017505567928730514, | |
| "loss": 0.0753, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 0.02038051374256611, | |
| "learning_rate": 0.00017461024498886417, | |
| "loss": 0.0571, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 9.348058700561523, | |
| "learning_rate": 0.00017416481069042317, | |
| "loss": 0.128, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 0.039238784462213516, | |
| "learning_rate": 0.0001737193763919822, | |
| "loss": 0.0551, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 5.897979736328125, | |
| "learning_rate": 0.00017327394209354122, | |
| "loss": 0.1287, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_accuracy": 0.9450980392156862, | |
| "eval_f1": 0.9450980392156862, | |
| "eval_loss": 0.27001550793647766, | |
| "eval_precision": 0.9450980392156862, | |
| "eval_recall": 0.9450980392156862, | |
| "eval_runtime": 17.21, | |
| "eval_samples_per_second": 59.268, | |
| "eval_steps_per_second": 7.438, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 5.809938907623291, | |
| "learning_rate": 0.00017282850779510022, | |
| "loss": 0.0894, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 0.013608383946120739, | |
| "learning_rate": 0.00017238307349665924, | |
| "loss": 0.0312, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.14918509125709534, | |
| "learning_rate": 0.00017193763919821827, | |
| "loss": 0.0813, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 0.09317727386951447, | |
| "learning_rate": 0.0001714922048997773, | |
| "loss": 0.1021, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 0.12424630671739578, | |
| "learning_rate": 0.00017104677060133632, | |
| "loss": 0.0382, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 0.004964092746376991, | |
| "learning_rate": 0.00017060133630289532, | |
| "loss": 0.0729, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 3.553861379623413, | |
| "learning_rate": 0.00017015590200445435, | |
| "loss": 0.0475, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 0.0814567431807518, | |
| "learning_rate": 0.00016971046770601338, | |
| "loss": 0.0424, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 0.5184776186943054, | |
| "learning_rate": 0.00016926503340757238, | |
| "loss": 0.0182, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.0049703894183039665, | |
| "learning_rate": 0.0001688195991091314, | |
| "loss": 0.0147, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_accuracy": 0.9205882352941176, | |
| "eval_f1": 0.9205882352941176, | |
| "eval_loss": 0.369125634431839, | |
| "eval_precision": 0.9205882352941176, | |
| "eval_recall": 0.9205882352941176, | |
| "eval_runtime": 16.9323, | |
| "eval_samples_per_second": 60.24, | |
| "eval_steps_per_second": 7.56, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 0.021510396152734756, | |
| "learning_rate": 0.00016837416481069043, | |
| "loss": 0.0821, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.06858960539102554, | |
| "learning_rate": 0.00016792873051224946, | |
| "loss": 0.0053, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 0.00354547961615026, | |
| "learning_rate": 0.00016748329621380848, | |
| "loss": 0.027, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 0.0027330678422003984, | |
| "learning_rate": 0.0001670378619153675, | |
| "loss": 0.0038, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 0.0024623360950499773, | |
| "learning_rate": 0.0001665924276169265, | |
| "loss": 0.0435, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 0.006295201368629932, | |
| "learning_rate": 0.00016614699331848553, | |
| "loss": 0.063, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 0.8971105813980103, | |
| "learning_rate": 0.00016570155902004456, | |
| "loss": 0.0625, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 0.004554128274321556, | |
| "learning_rate": 0.00016525612472160356, | |
| "loss": 0.004, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.1439096927642822, | |
| "learning_rate": 0.0001648106904231626, | |
| "loss": 0.0959, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 0.12275100499391556, | |
| "learning_rate": 0.0001643652561247216, | |
| "loss": 0.0416, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 0.9470588235294117, | |
| "eval_f1": 0.9470588235294117, | |
| "eval_loss": 0.25350436568260193, | |
| "eval_precision": 0.9470588235294117, | |
| "eval_recall": 0.9470588235294117, | |
| "eval_runtime": 16.7158, | |
| "eval_samples_per_second": 61.02, | |
| "eval_steps_per_second": 7.657, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.0064841569401323795, | |
| "learning_rate": 0.00016391982182628064, | |
| "loss": 0.0479, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 0.006001554429531097, | |
| "learning_rate": 0.00016347438752783967, | |
| "loss": 0.0624, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 0.1525709182024002, | |
| "learning_rate": 0.00016302895322939867, | |
| "loss": 0.0855, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 0.017199277877807617, | |
| "learning_rate": 0.0001625835189309577, | |
| "loss": 0.0199, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 0.40461423993110657, | |
| "learning_rate": 0.00016213808463251672, | |
| "loss": 0.0613, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.006222237832844257, | |
| "learning_rate": 0.00016169265033407572, | |
| "loss": 0.0168, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 0.0056493207812309265, | |
| "learning_rate": 0.00016124721603563474, | |
| "loss": 0.0247, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.004911198280751705, | |
| "learning_rate": 0.00016080178173719377, | |
| "loss": 0.074, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 0.0023081935942173004, | |
| "learning_rate": 0.0001603563474387528, | |
| "loss": 0.1029, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.010029925964772701, | |
| "learning_rate": 0.00015991091314031182, | |
| "loss": 0.0211, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9470588235294117, | |
| "eval_f1": 0.9470588235294117, | |
| "eval_loss": 0.25747954845428467, | |
| "eval_precision": 0.9470588235294117, | |
| "eval_recall": 0.9470588235294117, | |
| "eval_runtime": 16.8113, | |
| "eval_samples_per_second": 60.674, | |
| "eval_steps_per_second": 7.614, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 0.003727799979969859, | |
| "learning_rate": 0.00015946547884187085, | |
| "loss": 0.0126, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.7938788533210754, | |
| "learning_rate": 0.00015902004454342985, | |
| "loss": 0.0702, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 0.48882561922073364, | |
| "learning_rate": 0.00015857461024498888, | |
| "loss": 0.0513, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 10.071187973022461, | |
| "learning_rate": 0.0001581291759465479, | |
| "loss": 0.109, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.009675947949290276, | |
| "learning_rate": 0.0001576837416481069, | |
| "loss": 0.0034, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 0.004574100486934185, | |
| "learning_rate": 0.00015723830734966593, | |
| "loss": 0.0018, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.0033382533583790064, | |
| "learning_rate": 0.00015679287305122495, | |
| "loss": 0.0719, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 4.464893341064453, | |
| "learning_rate": 0.00015634743875278398, | |
| "loss": 0.0099, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.004051242955029011, | |
| "learning_rate": 0.000155902004454343, | |
| "loss": 0.0846, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 0.047729793936014175, | |
| "learning_rate": 0.000155456570155902, | |
| "loss": 0.088, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_accuracy": 0.9529411764705882, | |
| "eval_f1": 0.9529411764705882, | |
| "eval_loss": 0.19075074791908264, | |
| "eval_precision": 0.9529411764705882, | |
| "eval_recall": 0.9529411764705882, | |
| "eval_runtime": 16.8471, | |
| "eval_samples_per_second": 60.544, | |
| "eval_steps_per_second": 7.598, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.008470825850963593, | |
| "learning_rate": 0.00015501113585746103, | |
| "loss": 0.0022, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 2.529259204864502, | |
| "learning_rate": 0.00015456570155902006, | |
| "loss": 0.0917, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 0.3583894670009613, | |
| "learning_rate": 0.00015412026726057906, | |
| "loss": 0.0793, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.039421286433935165, | |
| "learning_rate": 0.00015367483296213809, | |
| "loss": 0.0742, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 0.2551974654197693, | |
| "learning_rate": 0.0001532293986636971, | |
| "loss": 0.0029, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.3370533883571625, | |
| "learning_rate": 0.00015278396436525614, | |
| "loss": 0.0563, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 1.642697811126709, | |
| "learning_rate": 0.00015233853006681517, | |
| "loss": 0.0362, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 12.125362396240234, | |
| "learning_rate": 0.0001518930957683742, | |
| "loss": 0.0949, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 0.0022143302485346794, | |
| "learning_rate": 0.0001514476614699332, | |
| "loss": 0.0219, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 0.0026613217778503895, | |
| "learning_rate": 0.00015100222717149222, | |
| "loss": 0.1849, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_accuracy": 0.9529411764705882, | |
| "eval_f1": 0.9529411764705882, | |
| "eval_loss": 0.2200697511434555, | |
| "eval_precision": 0.9529411764705882, | |
| "eval_recall": 0.9529411764705882, | |
| "eval_runtime": 16.9063, | |
| "eval_samples_per_second": 60.332, | |
| "eval_steps_per_second": 7.571, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 0.5232903957366943, | |
| "learning_rate": 0.00015055679287305122, | |
| "loss": 0.0013, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.5729717016220093, | |
| "learning_rate": 0.00015011135857461024, | |
| "loss": 0.0433, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.005640827585011721, | |
| "learning_rate": 0.00014966592427616927, | |
| "loss": 0.0007, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 7.901826858520508, | |
| "learning_rate": 0.0001492204899777283, | |
| "loss": 0.1131, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.4438769221305847, | |
| "learning_rate": 0.00014877505567928732, | |
| "loss": 0.0557, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 0.0032986474689096212, | |
| "learning_rate": 0.00014832962138084635, | |
| "loss": 0.001, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 0.09250050783157349, | |
| "learning_rate": 0.00014788418708240535, | |
| "loss": 0.02, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 0.005981163587421179, | |
| "learning_rate": 0.00014743875278396438, | |
| "loss": 0.0147, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.02270863950252533, | |
| "learning_rate": 0.0001469933184855234, | |
| "loss": 0.0246, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 0.014892498031258583, | |
| "learning_rate": 0.0001465478841870824, | |
| "loss": 0.0009, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_accuracy": 0.9549019607843138, | |
| "eval_f1": 0.9549019607843138, | |
| "eval_loss": 0.22289611399173737, | |
| "eval_precision": 0.9549019607843138, | |
| "eval_recall": 0.9549019607843138, | |
| "eval_runtime": 16.9051, | |
| "eval_samples_per_second": 60.337, | |
| "eval_steps_per_second": 7.572, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 10.428900718688965, | |
| "learning_rate": 0.00014610244988864143, | |
| "loss": 0.0115, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 0.0370117723941803, | |
| "learning_rate": 0.00014565701559020045, | |
| "loss": 0.0525, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 0.0029497628565877676, | |
| "learning_rate": 0.00014521158129175948, | |
| "loss": 0.0009, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.8202322125434875, | |
| "learning_rate": 0.0001447661469933185, | |
| "loss": 0.051, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 0.012352533638477325, | |
| "learning_rate": 0.0001443207126948775, | |
| "loss": 0.0034, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 0.5958288908004761, | |
| "learning_rate": 0.00014387527839643653, | |
| "loss": 0.0016, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 0.01864822395145893, | |
| "learning_rate": 0.00014342984409799556, | |
| "loss": 0.0113, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 0.053810037672519684, | |
| "learning_rate": 0.00014298440979955456, | |
| "loss": 0.0044, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 0.07594209164381027, | |
| "learning_rate": 0.00014253897550111359, | |
| "loss": 0.0022, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 0.003192998468875885, | |
| "learning_rate": 0.0001420935412026726, | |
| "loss": 0.0599, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_accuracy": 0.9607843137254902, | |
| "eval_f1": 0.9607843137254902, | |
| "eval_loss": 0.1780730038881302, | |
| "eval_precision": 0.9607843137254902, | |
| "eval_recall": 0.9607843137254902, | |
| "eval_runtime": 16.8581, | |
| "eval_samples_per_second": 60.505, | |
| "eval_steps_per_second": 7.593, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.0019150603329762816, | |
| "learning_rate": 0.00014164810690423164, | |
| "loss": 0.0136, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 0.0017021102830767632, | |
| "learning_rate": 0.00014120267260579067, | |
| "loss": 0.0004, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.002855106256902218, | |
| "learning_rate": 0.0001407572383073497, | |
| "loss": 0.0028, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 0.0011817626655101776, | |
| "learning_rate": 0.0001403118040089087, | |
| "loss": 0.0146, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 11.652885437011719, | |
| "learning_rate": 0.00013986636971046772, | |
| "loss": 0.0155, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 0.002146989107131958, | |
| "learning_rate": 0.00013942093541202674, | |
| "loss": 0.007, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 0.0012873058440163732, | |
| "learning_rate": 0.00013897550111358574, | |
| "loss": 0.0005, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 0.001976664876565337, | |
| "learning_rate": 0.00013853006681514477, | |
| "loss": 0.0067, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.004611461888998747, | |
| "learning_rate": 0.00013808463251670377, | |
| "loss": 0.068, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.05266120657324791, | |
| "learning_rate": 0.00013763919821826282, | |
| "loss": 0.0004, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "eval_accuracy": 0.9666666666666667, | |
| "eval_f1": 0.9666666666666667, | |
| "eval_loss": 0.1750936210155487, | |
| "eval_precision": 0.9666666666666667, | |
| "eval_recall": 0.9666666666666667, | |
| "eval_runtime": 16.8283, | |
| "eval_samples_per_second": 60.612, | |
| "eval_steps_per_second": 7.606, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 0.005359290167689323, | |
| "learning_rate": 0.00013719376391982185, | |
| "loss": 0.038, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.0019557911437004805, | |
| "learning_rate": 0.00013674832962138085, | |
| "loss": 0.0006, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "grad_norm": 0.002684570848941803, | |
| "learning_rate": 0.00013630289532293988, | |
| "loss": 0.0179, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 2.459765911102295, | |
| "learning_rate": 0.0001358574610244989, | |
| "loss": 0.0164, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "grad_norm": 0.010600595735013485, | |
| "learning_rate": 0.0001354120267260579, | |
| "loss": 0.0008, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 0.12665680050849915, | |
| "learning_rate": 0.00013496659242761693, | |
| "loss": 0.1161, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 0.0013257049722597003, | |
| "learning_rate": 0.00013452115812917595, | |
| "loss": 0.0718, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 2.888887882232666, | |
| "learning_rate": 0.00013407572383073498, | |
| "loss": 0.0028, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.0022232867777347565, | |
| "learning_rate": 0.000133630289532294, | |
| "loss": 0.0009, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 0.0026971769984811544, | |
| "learning_rate": 0.00013318485523385303, | |
| "loss": 0.0004, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "eval_accuracy": 0.9686274509803922, | |
| "eval_f1": 0.9686274509803922, | |
| "eval_loss": 0.1684277504682541, | |
| "eval_precision": 0.9686274509803922, | |
| "eval_recall": 0.9686274509803922, | |
| "eval_runtime": 16.8277, | |
| "eval_samples_per_second": 60.614, | |
| "eval_steps_per_second": 7.606, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.06871479004621506, | |
| "learning_rate": 0.00013273942093541203, | |
| "loss": 0.0004, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 0.010680126026272774, | |
| "learning_rate": 0.00013229398663697106, | |
| "loss": 0.0623, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 0.0024642229545861483, | |
| "learning_rate": 0.00013184855233853006, | |
| "loss": 0.0005, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 0.0014489213936030865, | |
| "learning_rate": 0.00013140311804008909, | |
| "loss": 0.0004, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "grad_norm": 0.004346741829067469, | |
| "learning_rate": 0.0001309576837416481, | |
| "loss": 0.0025, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "grad_norm": 0.006028163246810436, | |
| "learning_rate": 0.0001305122494432071, | |
| "loss": 0.0546, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.0034053712151944637, | |
| "learning_rate": 0.00013006681514476616, | |
| "loss": 0.028, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.004035326186567545, | |
| "learning_rate": 0.0001296213808463252, | |
| "loss": 0.0042, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 0.0025597705971449614, | |
| "learning_rate": 0.0001291759465478842, | |
| "loss": 0.0201, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.0010411799885332584, | |
| "learning_rate": 0.00012873051224944322, | |
| "loss": 0.0352, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "eval_accuracy": 0.9754901960784313, | |
| "eval_f1": 0.9754901960784313, | |
| "eval_loss": 0.15021921694278717, | |
| "eval_precision": 0.9754901960784313, | |
| "eval_recall": 0.9754901960784313, | |
| "eval_runtime": 16.7579, | |
| "eval_samples_per_second": 60.867, | |
| "eval_steps_per_second": 7.638, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 0.04698515310883522, | |
| "learning_rate": 0.00012828507795100224, | |
| "loss": 0.0036, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 0.013074472546577454, | |
| "learning_rate": 0.00012783964365256124, | |
| "loss": 0.0626, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 0.002302026841789484, | |
| "learning_rate": 0.00012739420935412027, | |
| "loss": 0.0004, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 0.012451753951609135, | |
| "learning_rate": 0.0001269487750556793, | |
| "loss": 0.0418, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "grad_norm": 0.010159431956708431, | |
| "learning_rate": 0.00012650334075723832, | |
| "loss": 0.0008, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 0.20833130180835724, | |
| "learning_rate": 0.00012605790645879735, | |
| "loss": 0.0006, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "grad_norm": 0.0008345023961737752, | |
| "learning_rate": 0.00012561247216035635, | |
| "loss": 0.0013, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 0.0027376762591302395, | |
| "learning_rate": 0.00012516703786191537, | |
| "loss": 0.0003, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.0012037215055897832, | |
| "learning_rate": 0.0001247216035634744, | |
| "loss": 0.0003, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 0.02287732996046543, | |
| "learning_rate": 0.0001242761692650334, | |
| "loss": 0.0003, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_accuracy": 0.9745098039215686, | |
| "eval_f1": 0.9745098039215686, | |
| "eval_loss": 0.15970657765865326, | |
| "eval_precision": 0.9745098039215686, | |
| "eval_recall": 0.9745098039215686, | |
| "eval_runtime": 16.8382, | |
| "eval_samples_per_second": 60.576, | |
| "eval_steps_per_second": 7.602, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 0.0013968138955533504, | |
| "learning_rate": 0.00012383073496659243, | |
| "loss": 0.0005, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 0.003733535995706916, | |
| "learning_rate": 0.00012338530066815145, | |
| "loss": 0.0003, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 0.976151168346405, | |
| "learning_rate": 0.00012293986636971045, | |
| "loss": 0.001, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.003409826662391424, | |
| "learning_rate": 0.0001224944320712695, | |
| "loss": 0.0003, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 0.004897921811789274, | |
| "learning_rate": 0.00012204899777282852, | |
| "loss": 0.0013, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.0010848743841052055, | |
| "learning_rate": 0.00012160356347438753, | |
| "loss": 0.0003, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "grad_norm": 0.0013221738627180457, | |
| "learning_rate": 0.00012115812917594656, | |
| "loss": 0.0002, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.09072667360305786, | |
| "learning_rate": 0.00012071269487750559, | |
| "loss": 0.0029, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "grad_norm": 0.00137105374597013, | |
| "learning_rate": 0.00012026726057906458, | |
| "loss": 0.0007, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 0.004980257712304592, | |
| "learning_rate": 0.00011982182628062361, | |
| "loss": 0.0003, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "eval_accuracy": 0.9558823529411765, | |
| "eval_f1": 0.9558823529411765, | |
| "eval_loss": 0.25734347105026245, | |
| "eval_precision": 0.9558823529411765, | |
| "eval_recall": 0.9558823529411765, | |
| "eval_runtime": 16.754, | |
| "eval_samples_per_second": 60.881, | |
| "eval_steps_per_second": 7.64, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 0.0013974602334201336, | |
| "learning_rate": 0.00011937639198218265, | |
| "loss": 0.0015, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 0.0027338312938809395, | |
| "learning_rate": 0.00011893095768374165, | |
| "loss": 0.0028, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.0018153024138882756, | |
| "learning_rate": 0.00011848552338530068, | |
| "loss": 0.0003, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 0.0008053297642618418, | |
| "learning_rate": 0.00011804008908685969, | |
| "loss": 0.0145, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.0016097394982352853, | |
| "learning_rate": 0.00011759465478841872, | |
| "loss": 0.0003, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "grad_norm": 0.0011555146193131804, | |
| "learning_rate": 0.00011714922048997774, | |
| "loss": 0.0007, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.0013847779482603073, | |
| "learning_rate": 0.00011670378619153674, | |
| "loss": 0.0005, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 0.026971347630023956, | |
| "learning_rate": 0.00011625835189309577, | |
| "loss": 0.04, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 0.004224107600748539, | |
| "learning_rate": 0.0001158129175946548, | |
| "loss": 0.0004, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 0.008265385404229164, | |
| "learning_rate": 0.00011536748329621381, | |
| "loss": 0.0005, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "eval_accuracy": 0.9666666666666667, | |
| "eval_f1": 0.9666666666666667, | |
| "eval_loss": 0.19066497683525085, | |
| "eval_precision": 0.9666666666666667, | |
| "eval_recall": 0.9666666666666667, | |
| "eval_runtime": 16.876, | |
| "eval_samples_per_second": 60.441, | |
| "eval_steps_per_second": 7.585, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 0.0009556623990647495, | |
| "learning_rate": 0.00011492204899777283, | |
| "loss": 0.0103, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.0008651684038341045, | |
| "learning_rate": 0.00011447661469933186, | |
| "loss": 0.0003, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 0.0021915507968515158, | |
| "learning_rate": 0.00011403118040089087, | |
| "loss": 0.0017, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.993601381778717, | |
| "learning_rate": 0.0001135857461024499, | |
| "loss": 0.0016, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "grad_norm": 0.012279433198273182, | |
| "learning_rate": 0.00011314031180400893, | |
| "loss": 0.0004, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 0.0192144475877285, | |
| "learning_rate": 0.00011269487750556793, | |
| "loss": 0.0211, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 0.002850558841601014, | |
| "learning_rate": 0.00011224944320712695, | |
| "loss": 0.0011, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 0.016757028177380562, | |
| "learning_rate": 0.00011180400890868597, | |
| "loss": 0.0004, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 0.009729539044201374, | |
| "learning_rate": 0.00011135857461024499, | |
| "loss": 0.0005, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 0.0012516066199168563, | |
| "learning_rate": 0.00011091314031180402, | |
| "loss": 0.0741, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "eval_accuracy": 0.9637254901960784, | |
| "eval_f1": 0.9637254901960784, | |
| "eval_loss": 0.20377103984355927, | |
| "eval_precision": 0.9637254901960784, | |
| "eval_recall": 0.9637254901960784, | |
| "eval_runtime": 16.9265, | |
| "eval_samples_per_second": 60.26, | |
| "eval_steps_per_second": 7.562, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.03771669417619705, | |
| "learning_rate": 0.00011046770601336303, | |
| "loss": 0.0005, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.0029582425486296415, | |
| "learning_rate": 0.00011002227171492206, | |
| "loss": 0.044, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.0017603106098249555, | |
| "learning_rate": 0.00010957683741648108, | |
| "loss": 0.0002, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "grad_norm": 0.006093455944210291, | |
| "learning_rate": 0.00010913140311804008, | |
| "loss": 0.0004, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 0.024271611124277115, | |
| "learning_rate": 0.00010868596881959911, | |
| "loss": 0.0004, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "grad_norm": 0.0059431749396026134, | |
| "learning_rate": 0.00010824053452115814, | |
| "loss": 0.0171, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 0.001350950333289802, | |
| "learning_rate": 0.00010779510022271715, | |
| "loss": 0.0169, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 0.11208397895097733, | |
| "learning_rate": 0.00010734966592427618, | |
| "loss": 0.0009, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 0.000902643718291074, | |
| "learning_rate": 0.0001069042316258352, | |
| "loss": 0.0002, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.0008040807442739606, | |
| "learning_rate": 0.00010645879732739422, | |
| "loss": 0.0025, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "eval_accuracy": 0.9647058823529412, | |
| "eval_f1": 0.9647058823529412, | |
| "eval_loss": 0.1929028332233429, | |
| "eval_precision": 0.9647058823529412, | |
| "eval_recall": 0.9647058823529412, | |
| "eval_runtime": 17.2457, | |
| "eval_samples_per_second": 59.145, | |
| "eval_steps_per_second": 7.422, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 0.001127121620811522, | |
| "learning_rate": 0.00010601336302895324, | |
| "loss": 0.0025, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.0010411780094727874, | |
| "learning_rate": 0.00010556792873051224, | |
| "loss": 0.0002, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "grad_norm": 0.0012262547388672829, | |
| "learning_rate": 0.00010512249443207127, | |
| "loss": 0.0031, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 0.06668848544359207, | |
| "learning_rate": 0.0001046770601336303, | |
| "loss": 0.0011, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 11.083710670471191, | |
| "learning_rate": 0.00010423162583518931, | |
| "loss": 0.1101, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 0.0041348133236169815, | |
| "learning_rate": 0.00010378619153674833, | |
| "loss": 0.0312, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "grad_norm": 0.0014749247347936034, | |
| "learning_rate": 0.00010334075723830736, | |
| "loss": 0.002, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "grad_norm": 0.00816721748560667, | |
| "learning_rate": 0.00010289532293986637, | |
| "loss": 0.0374, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.0029881075024604797, | |
| "learning_rate": 0.0001024498886414254, | |
| "loss": 0.0245, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 0.01441921480000019, | |
| "learning_rate": 0.00010200445434298443, | |
| "loss": 0.0293, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "eval_accuracy": 0.9607843137254902, | |
| "eval_f1": 0.9607843137254902, | |
| "eval_loss": 0.17395375669002533, | |
| "eval_precision": 0.9607843137254902, | |
| "eval_recall": 0.9607843137254902, | |
| "eval_runtime": 17.1376, | |
| "eval_samples_per_second": 59.518, | |
| "eval_steps_per_second": 7.469, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.005346087273210287, | |
| "learning_rate": 0.00010155902004454343, | |
| "loss": 0.054, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "grad_norm": 0.0087255435064435, | |
| "learning_rate": 0.00010111358574610245, | |
| "loss": 0.0008, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "grad_norm": 0.0033257934264838696, | |
| "learning_rate": 0.00010066815144766148, | |
| "loss": 0.0034, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "grad_norm": 0.0017741642659530044, | |
| "learning_rate": 0.00010022271714922049, | |
| "loss": 0.0008, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "grad_norm": 0.03513794392347336, | |
| "learning_rate": 9.977728285077952e-05, | |
| "loss": 0.0064, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "grad_norm": 0.0020874643232673407, | |
| "learning_rate": 9.933184855233853e-05, | |
| "loss": 0.0003, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "grad_norm": 0.0035891502629965544, | |
| "learning_rate": 9.888641425389756e-05, | |
| "loss": 0.0489, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 0.001030069775879383, | |
| "learning_rate": 9.844097995545658e-05, | |
| "loss": 0.0011, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "grad_norm": 0.053950581699609756, | |
| "learning_rate": 9.79955456570156e-05, | |
| "loss": 0.0016, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.0023863562382757664, | |
| "learning_rate": 9.755011135857461e-05, | |
| "loss": 0.0003, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "eval_accuracy": 0.9568627450980393, | |
| "eval_f1": 0.9568627450980393, | |
| "eval_loss": 0.25984036922454834, | |
| "eval_precision": 0.9568627450980393, | |
| "eval_recall": 0.9568627450980393, | |
| "eval_runtime": 17.3202, | |
| "eval_samples_per_second": 58.891, | |
| "eval_steps_per_second": 7.39, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "grad_norm": 0.054378170520067215, | |
| "learning_rate": 9.710467706013364e-05, | |
| "loss": 0.0005, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "grad_norm": 0.003204792272299528, | |
| "learning_rate": 9.665924276169266e-05, | |
| "loss": 0.0485, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "grad_norm": 0.001054179621860385, | |
| "learning_rate": 9.621380846325168e-05, | |
| "loss": 0.0456, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "grad_norm": 0.0008275459986180067, | |
| "learning_rate": 9.576837416481069e-05, | |
| "loss": 0.0003, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "grad_norm": 0.06051745265722275, | |
| "learning_rate": 9.532293986636972e-05, | |
| "loss": 0.0005, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "grad_norm": 0.0037743300199508667, | |
| "learning_rate": 9.487750556792874e-05, | |
| "loss": 0.0034, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 0.00043078724411316216, | |
| "learning_rate": 9.443207126948775e-05, | |
| "loss": 0.0004, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "grad_norm": 0.001160395797342062, | |
| "learning_rate": 9.398663697104677e-05, | |
| "loss": 0.0009, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 0.0006127849337644875, | |
| "learning_rate": 9.35412026726058e-05, | |
| "loss": 0.0001, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "grad_norm": 0.0023824446834623814, | |
| "learning_rate": 9.309576837416482e-05, | |
| "loss": 0.0037, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "eval_accuracy": 0.961764705882353, | |
| "eval_f1": 0.961764705882353, | |
| "eval_loss": 0.17718201875686646, | |
| "eval_precision": 0.961764705882353, | |
| "eval_recall": 0.961764705882353, | |
| "eval_runtime": 17.3617, | |
| "eval_samples_per_second": 58.75, | |
| "eval_steps_per_second": 7.373, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "grad_norm": 0.0015472627710551023, | |
| "learning_rate": 9.265033407572383e-05, | |
| "loss": 0.0132, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "grad_norm": 0.002203689655289054, | |
| "learning_rate": 9.220489977728286e-05, | |
| "loss": 0.0003, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "grad_norm": 0.003569718450307846, | |
| "learning_rate": 9.175946547884187e-05, | |
| "loss": 0.0006, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "grad_norm": 0.0020932252518832684, | |
| "learning_rate": 9.13140311804009e-05, | |
| "loss": 0.0051, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "grad_norm": 0.0030166106298565865, | |
| "learning_rate": 9.086859688195991e-05, | |
| "loss": 0.0008, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 0.001667293719947338, | |
| "learning_rate": 9.042316258351894e-05, | |
| "loss": 0.0004, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "grad_norm": 0.0005573901580646634, | |
| "learning_rate": 8.997772828507795e-05, | |
| "loss": 0.0097, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.0013365427730605006, | |
| "learning_rate": 8.953229398663698e-05, | |
| "loss": 0.0002, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "grad_norm": 0.0031586128752678633, | |
| "learning_rate": 8.9086859688196e-05, | |
| "loss": 0.0022, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "grad_norm": 0.21919859945774078, | |
| "learning_rate": 8.864142538975502e-05, | |
| "loss": 0.0213, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "eval_accuracy": 0.9519607843137254, | |
| "eval_f1": 0.9519607843137254, | |
| "eval_loss": 0.2910812199115753, | |
| "eval_precision": 0.9519607843137254, | |
| "eval_recall": 0.9519607843137254, | |
| "eval_runtime": 17.3785, | |
| "eval_samples_per_second": 58.693, | |
| "eval_steps_per_second": 7.365, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "grad_norm": 0.002107376931235194, | |
| "learning_rate": 8.819599109131403e-05, | |
| "loss": 0.001, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "grad_norm": 11.032261848449707, | |
| "learning_rate": 8.775055679287306e-05, | |
| "loss": 0.0331, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "grad_norm": 0.0010789623484015465, | |
| "learning_rate": 8.730512249443208e-05, | |
| "loss": 0.0002, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "grad_norm": 0.0013054576702415943, | |
| "learning_rate": 8.68596881959911e-05, | |
| "loss": 0.0019, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 0.001707877148874104, | |
| "learning_rate": 8.641425389755011e-05, | |
| "loss": 0.0004, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "grad_norm": 0.0006996811716817319, | |
| "learning_rate": 8.596881959910914e-05, | |
| "loss": 0.0172, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 0.00302655971609056, | |
| "learning_rate": 8.552338530066816e-05, | |
| "loss": 0.0461, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "grad_norm": 0.0005682900664396584, | |
| "learning_rate": 8.507795100222718e-05, | |
| "loss": 0.0002, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "grad_norm": 0.0009555260185152292, | |
| "learning_rate": 8.463251670378619e-05, | |
| "loss": 0.0002, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "grad_norm": 0.020968729630112648, | |
| "learning_rate": 8.418708240534521e-05, | |
| "loss": 0.027, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "eval_accuracy": 0.9519607843137254, | |
| "eval_f1": 0.9519607843137254, | |
| "eval_loss": 0.25403299927711487, | |
| "eval_precision": 0.9519607843137254, | |
| "eval_recall": 0.9519607843137254, | |
| "eval_runtime": 17.2499, | |
| "eval_samples_per_second": 59.131, | |
| "eval_steps_per_second": 7.42, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "grad_norm": 0.10158411413431168, | |
| "learning_rate": 8.374164810690424e-05, | |
| "loss": 0.0003, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 0.016370078548789024, | |
| "learning_rate": 8.329621380846325e-05, | |
| "loss": 0.0981, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "grad_norm": 0.0012518821749836206, | |
| "learning_rate": 8.285077951002228e-05, | |
| "loss": 0.0004, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 0.0029411010909825563, | |
| "learning_rate": 8.24053452115813e-05, | |
| "loss": 0.0007, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "grad_norm": 0.000937216158490628, | |
| "learning_rate": 8.195991091314032e-05, | |
| "loss": 0.0177, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 0.0034318570978939533, | |
| "learning_rate": 8.151447661469933e-05, | |
| "loss": 0.0312, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "grad_norm": 0.0007262133876793087, | |
| "learning_rate": 8.106904231625836e-05, | |
| "loss": 0.0002, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "grad_norm": 0.0007804339984431863, | |
| "learning_rate": 8.062360801781737e-05, | |
| "loss": 0.0002, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "grad_norm": 0.001342720352113247, | |
| "learning_rate": 8.01781737193764e-05, | |
| "loss": 0.0002, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "grad_norm": 0.001933095627464354, | |
| "learning_rate": 7.973273942093543e-05, | |
| "loss": 0.0155, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "eval_accuracy": 0.9549019607843138, | |
| "eval_f1": 0.9549019607843138, | |
| "eval_loss": 0.22524712979793549, | |
| "eval_precision": 0.9549019607843138, | |
| "eval_recall": 0.9549019607843138, | |
| "eval_runtime": 17.4105, | |
| "eval_samples_per_second": 58.585, | |
| "eval_steps_per_second": 7.352, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 0.0011171975638717413, | |
| "learning_rate": 7.928730512249444e-05, | |
| "loss": 0.0002, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "grad_norm": 0.001169139752164483, | |
| "learning_rate": 7.884187082405345e-05, | |
| "loss": 0.0002, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 0.002055455232039094, | |
| "learning_rate": 7.839643652561248e-05, | |
| "loss": 0.0082, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "grad_norm": 0.05008271709084511, | |
| "learning_rate": 7.79510022271715e-05, | |
| "loss": 0.0321, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "grad_norm": 0.000607622554525733, | |
| "learning_rate": 7.750556792873052e-05, | |
| "loss": 0.0397, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "grad_norm": 0.000795868574641645, | |
| "learning_rate": 7.706013363028953e-05, | |
| "loss": 0.0064, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "grad_norm": 0.010306187905371189, | |
| "learning_rate": 7.661469933184856e-05, | |
| "loss": 0.004, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "grad_norm": 0.001497789635322988, | |
| "learning_rate": 7.616926503340758e-05, | |
| "loss": 0.005, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "grad_norm": 0.12043255567550659, | |
| "learning_rate": 7.57238307349666e-05, | |
| "loss": 0.003, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 0.0034782905131578445, | |
| "learning_rate": 7.527839643652561e-05, | |
| "loss": 0.0002, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "eval_accuracy": 0.9431372549019608, | |
| "eval_f1": 0.9431372549019608, | |
| "eval_loss": 0.3040062189102173, | |
| "eval_precision": 0.9431372549019608, | |
| "eval_recall": 0.9431372549019608, | |
| "eval_runtime": 17.1818, | |
| "eval_samples_per_second": 59.365, | |
| "eval_steps_per_second": 7.45, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "grad_norm": 5.399389266967773, | |
| "learning_rate": 7.483296213808464e-05, | |
| "loss": 0.0092, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "grad_norm": 0.0007157580694183707, | |
| "learning_rate": 7.438752783964366e-05, | |
| "loss": 0.0134, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "grad_norm": 0.005470567848533392, | |
| "learning_rate": 7.394209354120267e-05, | |
| "loss": 0.147, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "grad_norm": 0.01675906591117382, | |
| "learning_rate": 7.34966592427617e-05, | |
| "loss": 0.0813, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "grad_norm": 5.728336334228516, | |
| "learning_rate": 7.305122494432071e-05, | |
| "loss": 0.0275, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "grad_norm": 0.003522884799167514, | |
| "learning_rate": 7.260579064587974e-05, | |
| "loss": 0.0109, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "grad_norm": 0.026970118284225464, | |
| "learning_rate": 7.216035634743875e-05, | |
| "loss": 0.0004, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "grad_norm": 0.12984509766101837, | |
| "learning_rate": 7.171492204899778e-05, | |
| "loss": 0.0007, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "grad_norm": 0.009168056771159172, | |
| "learning_rate": 7.126948775055679e-05, | |
| "loss": 0.0006, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "grad_norm": 0.0009597400785423815, | |
| "learning_rate": 7.082405345211582e-05, | |
| "loss": 0.011, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "eval_accuracy": 0.9598039215686275, | |
| "eval_f1": 0.9598039215686275, | |
| "eval_loss": 0.1923176795244217, | |
| "eval_precision": 0.9598039215686275, | |
| "eval_recall": 0.9598039215686275, | |
| "eval_runtime": 17.225, | |
| "eval_samples_per_second": 59.216, | |
| "eval_steps_per_second": 7.431, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 0.0009739417000673711, | |
| "learning_rate": 7.037861915367485e-05, | |
| "loss": 0.0014, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "grad_norm": 0.0022935476154088974, | |
| "learning_rate": 6.993318485523386e-05, | |
| "loss": 0.0133, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "grad_norm": 0.0005638069123961031, | |
| "learning_rate": 6.948775055679287e-05, | |
| "loss": 0.0009, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "grad_norm": 0.014625852927565575, | |
| "learning_rate": 6.904231625835188e-05, | |
| "loss": 0.0006, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "grad_norm": 0.001474756863899529, | |
| "learning_rate": 6.859688195991092e-05, | |
| "loss": 0.0128, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "grad_norm": 0.0029620621353387833, | |
| "learning_rate": 6.815144766146994e-05, | |
| "loss": 0.0002, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "grad_norm": 0.0016939816996455193, | |
| "learning_rate": 6.770601336302895e-05, | |
| "loss": 0.0003, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 0.0009252108866348863, | |
| "learning_rate": 6.726057906458798e-05, | |
| "loss": 0.0011, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "grad_norm": 0.1285027265548706, | |
| "learning_rate": 6.6815144766147e-05, | |
| "loss": 0.0071, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "grad_norm": 0.00125114805996418, | |
| "learning_rate": 6.636971046770602e-05, | |
| "loss": 0.0006, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "eval_accuracy": 0.9637254901960784, | |
| "eval_f1": 0.9637254901960784, | |
| "eval_loss": 0.20890936255455017, | |
| "eval_precision": 0.9637254901960784, | |
| "eval_recall": 0.9637254901960784, | |
| "eval_runtime": 17.1679, | |
| "eval_samples_per_second": 59.413, | |
| "eval_steps_per_second": 7.456, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "grad_norm": 0.0017677777213975787, | |
| "learning_rate": 6.592427616926503e-05, | |
| "loss": 0.03, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "grad_norm": 0.0006067939684726298, | |
| "learning_rate": 6.547884187082406e-05, | |
| "loss": 0.0003, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "grad_norm": 0.0010102881351485848, | |
| "learning_rate": 6.503340757238308e-05, | |
| "loss": 0.0002, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "grad_norm": 0.0009144017240032554, | |
| "learning_rate": 6.45879732739421e-05, | |
| "loss": 0.0282, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "grad_norm": 0.0007274287054315209, | |
| "learning_rate": 6.414253897550112e-05, | |
| "loss": 0.0002, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "grad_norm": 0.0034935837611556053, | |
| "learning_rate": 6.369710467706013e-05, | |
| "loss": 0.0046, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "grad_norm": 0.004235483705997467, | |
| "learning_rate": 6.325167037861916e-05, | |
| "loss": 0.0002, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "grad_norm": 0.0013753636740148067, | |
| "learning_rate": 6.280623608017817e-05, | |
| "loss": 0.0101, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 0.0008035491337068379, | |
| "learning_rate": 6.23608017817372e-05, | |
| "loss": 0.0002, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "grad_norm": 0.00173095241189003, | |
| "learning_rate": 6.191536748329621e-05, | |
| "loss": 0.0002, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "eval_accuracy": 0.957843137254902, | |
| "eval_f1": 0.957843137254902, | |
| "eval_loss": 0.22062458097934723, | |
| "eval_precision": 0.957843137254902, | |
| "eval_recall": 0.957843137254902, | |
| "eval_runtime": 17.3063, | |
| "eval_samples_per_second": 58.938, | |
| "eval_steps_per_second": 7.396, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "grad_norm": 0.001496517681516707, | |
| "learning_rate": 6.146993318485523e-05, | |
| "loss": 0.0002, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "grad_norm": 0.0005152082885615528, | |
| "learning_rate": 6.102449888641426e-05, | |
| "loss": 0.0002, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "grad_norm": 0.0005918457172811031, | |
| "learning_rate": 6.057906458797328e-05, | |
| "loss": 0.0192, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "grad_norm": 0.000515251827891916, | |
| "learning_rate": 6.013363028953229e-05, | |
| "loss": 0.0003, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "grad_norm": 0.00043858023127540946, | |
| "learning_rate": 5.9688195991091325e-05, | |
| "loss": 0.0105, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 0.0018106413772329688, | |
| "learning_rate": 5.924276169265034e-05, | |
| "loss": 0.0059, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "grad_norm": 0.000563229201361537, | |
| "learning_rate": 5.879732739420936e-05, | |
| "loss": 0.0003, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "grad_norm": 0.001515958341769874, | |
| "learning_rate": 5.835189309576837e-05, | |
| "loss": 0.0058, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "grad_norm": 0.0005047390004619956, | |
| "learning_rate": 5.79064587973274e-05, | |
| "loss": 0.0002, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "grad_norm": 0.03174121677875519, | |
| "learning_rate": 5.746102449888642e-05, | |
| "loss": 0.0006, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "eval_accuracy": 0.9627450980392157, | |
| "eval_f1": 0.9627450980392157, | |
| "eval_loss": 0.22668223083019257, | |
| "eval_precision": 0.9627450980392157, | |
| "eval_recall": 0.9627450980392157, | |
| "eval_runtime": 16.905, | |
| "eval_samples_per_second": 60.337, | |
| "eval_steps_per_second": 7.572, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "grad_norm": 0.0019246222218498588, | |
| "learning_rate": 5.701559020044544e-05, | |
| "loss": 0.0002, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "grad_norm": 8.673022270202637, | |
| "learning_rate": 5.6570155902004463e-05, | |
| "loss": 0.0058, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "grad_norm": 0.0006804656004533172, | |
| "learning_rate": 5.6124721603563476e-05, | |
| "loss": 0.0002, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "grad_norm": 0.0013651803601533175, | |
| "learning_rate": 5.5679287305122496e-05, | |
| "loss": 0.0007, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "grad_norm": 0.0014620161382481456, | |
| "learning_rate": 5.5233853006681516e-05, | |
| "loss": 0.0002, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "grad_norm": 0.0009020831785164773, | |
| "learning_rate": 5.478841870824054e-05, | |
| "loss": 0.0003, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 0.0010328377829864621, | |
| "learning_rate": 5.4342984409799555e-05, | |
| "loss": 0.0016, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "grad_norm": 0.002698230091482401, | |
| "learning_rate": 5.3897550111358575e-05, | |
| "loss": 0.0002, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "grad_norm": 0.0025662758853286505, | |
| "learning_rate": 5.34521158129176e-05, | |
| "loss": 0.0002, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "grad_norm": 0.0005825618281960487, | |
| "learning_rate": 5.300668151447662e-05, | |
| "loss": 0.0001, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "eval_accuracy": 0.9637254901960784, | |
| "eval_f1": 0.9637254901960784, | |
| "eval_loss": 0.17346832156181335, | |
| "eval_precision": 0.9637254901960784, | |
| "eval_recall": 0.9637254901960784, | |
| "eval_runtime": 17.0715, | |
| "eval_samples_per_second": 59.749, | |
| "eval_steps_per_second": 7.498, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "grad_norm": 0.001066404627636075, | |
| "learning_rate": 5.2561247216035634e-05, | |
| "loss": 0.0001, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "grad_norm": 0.0007172970799729228, | |
| "learning_rate": 5.2115812917594654e-05, | |
| "loss": 0.0002, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "grad_norm": 0.000634915370028466, | |
| "learning_rate": 5.167037861915368e-05, | |
| "loss": 0.0001, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "grad_norm": 0.004406619351357222, | |
| "learning_rate": 5.12249443207127e-05, | |
| "loss": 0.0001, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "grad_norm": 0.015614562667906284, | |
| "learning_rate": 5.077951002227171e-05, | |
| "loss": 0.0002, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "grad_norm": 0.0017906671855598688, | |
| "learning_rate": 5.033407572383074e-05, | |
| "loss": 0.0003, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "grad_norm": 0.002051855204626918, | |
| "learning_rate": 4.988864142538976e-05, | |
| "loss": 0.0003, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "grad_norm": 0.0007296734838746488, | |
| "learning_rate": 4.944320712694878e-05, | |
| "loss": 0.0001, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "grad_norm": 0.0005030659376643598, | |
| "learning_rate": 4.89977728285078e-05, | |
| "loss": 0.001, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "grad_norm": 0.00040412909584119916, | |
| "learning_rate": 4.855233853006682e-05, | |
| "loss": 0.0001, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "eval_accuracy": 0.9686274509803922, | |
| "eval_f1": 0.9686274509803922, | |
| "eval_loss": 0.16111387312412262, | |
| "eval_precision": 0.9686274509803922, | |
| "eval_recall": 0.9686274509803922, | |
| "eval_runtime": 16.979, | |
| "eval_samples_per_second": 60.074, | |
| "eval_steps_per_second": 7.539, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "grad_norm": 0.0005902125267311931, | |
| "learning_rate": 4.810690423162584e-05, | |
| "loss": 0.0002, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "grad_norm": 0.0005297433235682547, | |
| "learning_rate": 4.766146993318486e-05, | |
| "loss": 0.0002, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "grad_norm": 0.0005097580142319202, | |
| "learning_rate": 4.721603563474388e-05, | |
| "loss": 0.0001, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "grad_norm": 0.0006824088632129133, | |
| "learning_rate": 4.67706013363029e-05, | |
| "loss": 0.0002, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "grad_norm": 0.0007693713996559381, | |
| "learning_rate": 4.632516703786192e-05, | |
| "loss": 0.0002, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "grad_norm": 0.0010686744935810566, | |
| "learning_rate": 4.5879732739420936e-05, | |
| "loss": 0.0001, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "grad_norm": 0.0008867672295309603, | |
| "learning_rate": 4.5434298440979956e-05, | |
| "loss": 0.0001, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "grad_norm": 0.029097959399223328, | |
| "learning_rate": 4.4988864142538976e-05, | |
| "loss": 0.0002, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "grad_norm": 1.0295618772506714, | |
| "learning_rate": 4.4543429844098e-05, | |
| "loss": 0.0232, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "grad_norm": 0.01833498664200306, | |
| "learning_rate": 4.4097995545657015e-05, | |
| "loss": 0.0003, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "eval_accuracy": 0.9676470588235294, | |
| "eval_f1": 0.9676470588235294, | |
| "eval_loss": 0.15838229656219482, | |
| "eval_precision": 0.9676470588235294, | |
| "eval_recall": 0.9676470588235294, | |
| "eval_runtime": 17.1614, | |
| "eval_samples_per_second": 59.436, | |
| "eval_steps_per_second": 7.459, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "grad_norm": 0.00047049217391759157, | |
| "learning_rate": 4.365256124721604e-05, | |
| "loss": 0.0002, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "grad_norm": 0.0005326452082954347, | |
| "learning_rate": 4.3207126948775055e-05, | |
| "loss": 0.0001, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "grad_norm": 0.004704196471720934, | |
| "learning_rate": 4.276169265033408e-05, | |
| "loss": 0.0001, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "grad_norm": 0.0015603323699906468, | |
| "learning_rate": 4.2316258351893094e-05, | |
| "loss": 0.0001, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "grad_norm": 0.001259263837710023, | |
| "learning_rate": 4.187082405345212e-05, | |
| "loss": 0.0002, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "grad_norm": 0.0009968471713364124, | |
| "learning_rate": 4.142538975501114e-05, | |
| "loss": 0.0173, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "grad_norm": 0.0011363897938281298, | |
| "learning_rate": 4.097995545657016e-05, | |
| "loss": 0.0002, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "grad_norm": 0.000970890570897609, | |
| "learning_rate": 4.053452115812918e-05, | |
| "loss": 0.0002, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.0004184432327747345, | |
| "learning_rate": 4.00890868596882e-05, | |
| "loss": 0.0002, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "grad_norm": 0.0016488181427121162, | |
| "learning_rate": 3.964365256124722e-05, | |
| "loss": 0.0001, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "eval_accuracy": 0.9715686274509804, | |
| "eval_f1": 0.9715686274509804, | |
| "eval_loss": 0.15909050405025482, | |
| "eval_precision": 0.9715686274509804, | |
| "eval_recall": 0.9715686274509804, | |
| "eval_runtime": 17.2363, | |
| "eval_samples_per_second": 59.178, | |
| "eval_steps_per_second": 7.426, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 0.0007161149405874312, | |
| "learning_rate": 3.919821826280624e-05, | |
| "loss": 0.0001, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "grad_norm": 0.025815103203058243, | |
| "learning_rate": 3.875278396436526e-05, | |
| "loss": 0.0002, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "grad_norm": 0.6380942463874817, | |
| "learning_rate": 3.830734966592428e-05, | |
| "loss": 0.0067, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "grad_norm": 0.003612744389101863, | |
| "learning_rate": 3.78619153674833e-05, | |
| "loss": 0.0004, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "grad_norm": 0.002143553225323558, | |
| "learning_rate": 3.741648106904232e-05, | |
| "loss": 0.0002, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "grad_norm": 0.001735298428684473, | |
| "learning_rate": 3.697104677060134e-05, | |
| "loss": 0.0002, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "grad_norm": 0.00038884536479599774, | |
| "learning_rate": 3.652561247216036e-05, | |
| "loss": 0.0004, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "grad_norm": 0.0004352598334662616, | |
| "learning_rate": 3.608017817371938e-05, | |
| "loss": 0.0001, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "grad_norm": 0.0030426643788814545, | |
| "learning_rate": 3.5634743875278396e-05, | |
| "loss": 0.0002, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "grad_norm": 0.0004016205493826419, | |
| "learning_rate": 3.518930957683742e-05, | |
| "loss": 0.0005, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_f1": 0.9705882352941176, | |
| "eval_loss": 0.15964852273464203, | |
| "eval_precision": 0.9705882352941176, | |
| "eval_recall": 0.9705882352941176, | |
| "eval_runtime": 17.4233, | |
| "eval_samples_per_second": 58.542, | |
| "eval_steps_per_second": 7.346, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "grad_norm": 0.0010756178526207805, | |
| "learning_rate": 3.4743875278396436e-05, | |
| "loss": 0.0001, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "grad_norm": 0.002286019967868924, | |
| "learning_rate": 3.429844097995546e-05, | |
| "loss": 0.0001, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "grad_norm": 0.0019246740266680717, | |
| "learning_rate": 3.3853006681514475e-05, | |
| "loss": 0.0001, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "grad_norm": 0.0022740724962204695, | |
| "learning_rate": 3.34075723830735e-05, | |
| "loss": 0.0001, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "grad_norm": 0.0003697601496241987, | |
| "learning_rate": 3.2962138084632515e-05, | |
| "loss": 0.0001, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "grad_norm": 0.00033845697180368006, | |
| "learning_rate": 3.251670378619154e-05, | |
| "loss": 0.0001, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 0.0004189737082924694, | |
| "learning_rate": 3.207126948775056e-05, | |
| "loss": 0.0001, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "grad_norm": 0.0008992131915874779, | |
| "learning_rate": 3.162583518930958e-05, | |
| "loss": 0.0004, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "grad_norm": 0.0008794433670118451, | |
| "learning_rate": 3.11804008908686e-05, | |
| "loss": 0.0001, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "grad_norm": 0.0017194098327308893, | |
| "learning_rate": 3.073496659242761e-05, | |
| "loss": 0.0002, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "eval_accuracy": 0.9715686274509804, | |
| "eval_f1": 0.9715686274509804, | |
| "eval_loss": 0.15634377300739288, | |
| "eval_precision": 0.9715686274509804, | |
| "eval_recall": 0.9715686274509804, | |
| "eval_runtime": 17.0753, | |
| "eval_samples_per_second": 59.735, | |
| "eval_steps_per_second": 7.496, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "grad_norm": 0.0009110970422625542, | |
| "learning_rate": 3.028953229398664e-05, | |
| "loss": 0.0002, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "grad_norm": 0.0010168278822675347, | |
| "learning_rate": 2.9844097995545663e-05, | |
| "loss": 0.0001, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "grad_norm": 0.0005708567332476377, | |
| "learning_rate": 2.939866369710468e-05, | |
| "loss": 0.0001, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "grad_norm": 0.0033207752276211977, | |
| "learning_rate": 2.89532293986637e-05, | |
| "loss": 0.0001, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "grad_norm": 0.0010962020605802536, | |
| "learning_rate": 2.850779510022272e-05, | |
| "loss": 0.0001, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "grad_norm": 0.0008160584839060903, | |
| "learning_rate": 2.8062360801781738e-05, | |
| "loss": 0.0001, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "grad_norm": 0.001714337500743568, | |
| "learning_rate": 2.7616926503340758e-05, | |
| "loss": 0.0001, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "grad_norm": 0.002063535852357745, | |
| "learning_rate": 2.7171492204899778e-05, | |
| "loss": 0.0001, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "grad_norm": 0.0010866498341783881, | |
| "learning_rate": 2.67260579064588e-05, | |
| "loss": 0.0001, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "grad_norm": 0.0009104039054363966, | |
| "learning_rate": 2.6280623608017817e-05, | |
| "loss": 0.0002, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "eval_accuracy": 0.9715686274509804, | |
| "eval_f1": 0.9715686274509804, | |
| "eval_loss": 0.15503399074077606, | |
| "eval_precision": 0.9715686274509804, | |
| "eval_recall": 0.9715686274509804, | |
| "eval_runtime": 17.3241, | |
| "eval_samples_per_second": 58.877, | |
| "eval_steps_per_second": 7.389, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "grad_norm": 0.0029284367337822914, | |
| "learning_rate": 2.583518930957684e-05, | |
| "loss": 0.007, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "grad_norm": 0.0005087658646516502, | |
| "learning_rate": 2.5389755011135856e-05, | |
| "loss": 0.0001, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 0.0005650007515214384, | |
| "learning_rate": 2.494432071269488e-05, | |
| "loss": 0.0001, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "grad_norm": 0.0005321349017322063, | |
| "learning_rate": 2.44988864142539e-05, | |
| "loss": 0.0001, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 0.0008177491254173219, | |
| "learning_rate": 2.405345211581292e-05, | |
| "loss": 0.0001, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "grad_norm": 0.0005406069685705006, | |
| "learning_rate": 2.360801781737194e-05, | |
| "loss": 0.0001, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "grad_norm": 0.000659614393953234, | |
| "learning_rate": 2.316258351893096e-05, | |
| "loss": 0.0001, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "grad_norm": 0.0004996512434445322, | |
| "learning_rate": 2.2717149220489978e-05, | |
| "loss": 0.0001, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "grad_norm": 0.0002974416420329362, | |
| "learning_rate": 2.2271714922049e-05, | |
| "loss": 0.0001, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "grad_norm": 0.0011179678840562701, | |
| "learning_rate": 2.182628062360802e-05, | |
| "loss": 0.0001, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "eval_accuracy": 0.9705882352941176, | |
| "eval_f1": 0.9705882352941176, | |
| "eval_loss": 0.15417079627513885, | |
| "eval_precision": 0.9705882352941176, | |
| "eval_recall": 0.9705882352941176, | |
| "eval_runtime": 17.0729, | |
| "eval_samples_per_second": 59.744, | |
| "eval_steps_per_second": 7.497, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "grad_norm": 0.0005180141888558865, | |
| "learning_rate": 2.138084632516704e-05, | |
| "loss": 0.0001, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "grad_norm": 0.0005326379905454814, | |
| "learning_rate": 2.093541202672606e-05, | |
| "loss": 0.0001, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "grad_norm": 0.0006433409289456904, | |
| "learning_rate": 2.048997772828508e-05, | |
| "loss": 0.0001, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.002777345711365342, | |
| "learning_rate": 2.00445434298441e-05, | |
| "loss": 0.0001, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "grad_norm": 0.0007074729655869305, | |
| "learning_rate": 1.959910913140312e-05, | |
| "loss": 0.0001, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 0.001536020776256919, | |
| "learning_rate": 1.915367483296214e-05, | |
| "loss": 0.0001, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "grad_norm": 0.0009424517047591507, | |
| "learning_rate": 1.870824053452116e-05, | |
| "loss": 0.0001, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "grad_norm": 0.3284554183483124, | |
| "learning_rate": 1.826280623608018e-05, | |
| "loss": 0.0026, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "grad_norm": 0.00030903713195584714, | |
| "learning_rate": 1.7817371937639198e-05, | |
| "loss": 0.0001, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "grad_norm": 0.0005440358072519302, | |
| "learning_rate": 1.7371937639198218e-05, | |
| "loss": 0.0001, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "eval_accuracy": 0.9715686274509804, | |
| "eval_f1": 0.9715686274509804, | |
| "eval_loss": 0.15382429957389832, | |
| "eval_precision": 0.9715686274509804, | |
| "eval_recall": 0.9715686274509804, | |
| "eval_runtime": 16.821, | |
| "eval_samples_per_second": 60.638, | |
| "eval_steps_per_second": 7.61, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "grad_norm": 0.0006746925064362586, | |
| "learning_rate": 1.6926503340757238e-05, | |
| "loss": 0.0001, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "grad_norm": 0.00037791216163896024, | |
| "learning_rate": 1.6481069042316257e-05, | |
| "loss": 0.0001, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 0.0012918213615193963, | |
| "learning_rate": 1.603563474387528e-05, | |
| "loss": 0.0001, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "grad_norm": 0.000723692704923451, | |
| "learning_rate": 1.55902004454343e-05, | |
| "loss": 0.0001, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "grad_norm": 0.0006746066501364112, | |
| "learning_rate": 1.514476614699332e-05, | |
| "loss": 0.0001, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "grad_norm": 0.000450183724751696, | |
| "learning_rate": 1.469933184855234e-05, | |
| "loss": 0.0001, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "grad_norm": 0.0011862111277878284, | |
| "learning_rate": 1.425389755011136e-05, | |
| "loss": 0.0001, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "grad_norm": 0.0017749534454196692, | |
| "learning_rate": 1.3808463251670379e-05, | |
| "loss": 0.0001, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "grad_norm": 0.0003237236524000764, | |
| "learning_rate": 1.33630289532294e-05, | |
| "loss": 0.0001, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "grad_norm": 0.0004740317235700786, | |
| "learning_rate": 1.291759465478842e-05, | |
| "loss": 0.0001, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "eval_accuracy": 0.9715686274509804, | |
| "eval_f1": 0.9715686274509804, | |
| "eval_loss": 0.15357248485088348, | |
| "eval_precision": 0.9715686274509804, | |
| "eval_recall": 0.9715686274509804, | |
| "eval_runtime": 17.097, | |
| "eval_samples_per_second": 59.659, | |
| "eval_steps_per_second": 7.487, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "grad_norm": 0.0004047084948979318, | |
| "learning_rate": 1.247216035634744e-05, | |
| "loss": 0.0001, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "grad_norm": 0.0028567886911332607, | |
| "learning_rate": 1.202672605790646e-05, | |
| "loss": 0.0001, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "grad_norm": 0.0005680415779352188, | |
| "learning_rate": 1.158129175946548e-05, | |
| "loss": 0.0001, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "grad_norm": 0.00213377526961267, | |
| "learning_rate": 1.11358574610245e-05, | |
| "loss": 0.0001, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "grad_norm": 0.001148115610703826, | |
| "learning_rate": 1.069042316258352e-05, | |
| "loss": 0.0001, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "grad_norm": 0.00045941799180582166, | |
| "learning_rate": 1.024498886414254e-05, | |
| "loss": 0.0001, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "grad_norm": 0.0024906108155846596, | |
| "learning_rate": 9.79955456570156e-06, | |
| "loss": 0.0001, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "grad_norm": 0.0004669167974498123, | |
| "learning_rate": 9.35412026726058e-06, | |
| "loss": 0.0001, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "grad_norm": 0.0003813539515249431, | |
| "learning_rate": 8.908685968819599e-06, | |
| "loss": 0.0001, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "grad_norm": 0.00048394210170954466, | |
| "learning_rate": 8.463251670378619e-06, | |
| "loss": 0.0001, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "eval_accuracy": 0.9715686274509804, | |
| "eval_f1": 0.9715686274509804, | |
| "eval_loss": 0.15336920320987701, | |
| "eval_precision": 0.9715686274509804, | |
| "eval_recall": 0.9715686274509804, | |
| "eval_runtime": 16.9378, | |
| "eval_samples_per_second": 60.22, | |
| "eval_steps_per_second": 7.557, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 0.001094264443963766, | |
| "learning_rate": 8.01781737193764e-06, | |
| "loss": 0.0001, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "grad_norm": 0.0007720951689407229, | |
| "learning_rate": 7.57238307349666e-06, | |
| "loss": 0.0001, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "grad_norm": 0.0010363436304032803, | |
| "learning_rate": 7.12694877505568e-06, | |
| "loss": 0.0001, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "grad_norm": 0.0006155350711196661, | |
| "learning_rate": 6.6815144766147e-06, | |
| "loss": 0.0001, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "grad_norm": 0.002117099007591605, | |
| "learning_rate": 6.23608017817372e-06, | |
| "loss": 0.0001, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "grad_norm": 0.00035223804297856987, | |
| "learning_rate": 5.79064587973274e-06, | |
| "loss": 0.0001, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "grad_norm": 0.0008616923005320132, | |
| "learning_rate": 5.34521158129176e-06, | |
| "loss": 0.0001, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "grad_norm": 0.0003698187065310776, | |
| "learning_rate": 4.89977728285078e-06, | |
| "loss": 0.0001, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "grad_norm": 0.0008464885177090764, | |
| "learning_rate": 4.4543429844097995e-06, | |
| "loss": 0.0001, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "grad_norm": 0.000521197565831244, | |
| "learning_rate": 4.00890868596882e-06, | |
| "loss": 0.0001, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "eval_accuracy": 0.9715686274509804, | |
| "eval_f1": 0.9715686274509804, | |
| "eval_loss": 0.15329474210739136, | |
| "eval_precision": 0.9715686274509804, | |
| "eval_recall": 0.9715686274509804, | |
| "eval_runtime": 17.0026, | |
| "eval_samples_per_second": 59.991, | |
| "eval_steps_per_second": 7.528, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "grad_norm": 0.00078478833893314, | |
| "learning_rate": 3.56347438752784e-06, | |
| "loss": 0.0001, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "grad_norm": 0.00044067302951589227, | |
| "learning_rate": 3.11804008908686e-06, | |
| "loss": 0.0001, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "grad_norm": 0.0005106424796395004, | |
| "learning_rate": 2.67260579064588e-06, | |
| "loss": 0.0001, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "grad_norm": 0.0003234909090679139, | |
| "learning_rate": 2.2271714922048998e-06, | |
| "loss": 0.0001, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "grad_norm": 0.0003544181527104229, | |
| "learning_rate": 1.78173719376392e-06, | |
| "loss": 0.0001, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "grad_norm": 0.00040398509008809924, | |
| "learning_rate": 1.33630289532294e-06, | |
| "loss": 0.0001, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "grad_norm": 0.0009952255059033632, | |
| "learning_rate": 8.9086859688196e-07, | |
| "loss": 0.0001, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "grad_norm": 0.0003712301841005683, | |
| "learning_rate": 4.4543429844098e-07, | |
| "loss": 0.0001, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.0012604963267222047, | |
| "learning_rate": 0.0, | |
| "loss": 0.0099, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 4490, | |
| "total_flos": 5.562769847811564e+18, | |
| "train_loss": 0.025327244219600944, | |
| "train_runtime": 3846.586, | |
| "train_samples_per_second": 18.637, | |
| "train_steps_per_second": 1.167 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4490, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 100, | |
| "total_flos": 5.562769847811564e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |