| { | |
| "best_global_step": 1800, | |
| "best_metric": 0.9762746087834427, | |
| "best_model_checkpoint": "/workspace/AI/Trend_Primus-FineWeb_Filtering-pipeline/securebert_finetuned/defensive_vs_rest/checkpoint-1800", | |
| "epoch": 3.8181818181818183, | |
| "eval_steps": 300, | |
| "global_step": 2100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01818181818181818, | |
| "grad_norm": 0.4759802222251892, | |
| "learning_rate": 5.714285714285715e-07, | |
| "loss": 0.1542, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03636363636363636, | |
| "grad_norm": 0.3740444481372833, | |
| "learning_rate": 1.142857142857143e-06, | |
| "loss": 0.1623, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05454545454545454, | |
| "grad_norm": 0.5404930710792542, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 0.1582, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07272727272727272, | |
| "grad_norm": 0.3705693781375885, | |
| "learning_rate": 2.285714285714286e-06, | |
| "loss": 0.1422, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 0.4376075565814972, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.1403, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10909090909090909, | |
| "grad_norm": 0.46200892329216003, | |
| "learning_rate": 3.428571428571429e-06, | |
| "loss": 0.142, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12727272727272726, | |
| "grad_norm": 0.2855919599533081, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.144, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14545454545454545, | |
| "grad_norm": 0.33721357583999634, | |
| "learning_rate": 4.571428571428572e-06, | |
| "loss": 0.1401, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16363636363636364, | |
| "grad_norm": 0.6751205921173096, | |
| "learning_rate": 5.142857142857142e-06, | |
| "loss": 0.132, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 1.1521556377410889, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.1277, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.6443154811859131, | |
| "learning_rate": 6.285714285714286e-06, | |
| "loss": 0.1079, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21818181818181817, | |
| "grad_norm": 0.6698077321052551, | |
| "learning_rate": 6.857142857142858e-06, | |
| "loss": 0.1066, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23636363636363636, | |
| "grad_norm": 0.8653299808502197, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 0.0943, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2545454545454545, | |
| "grad_norm": 1.1476327180862427, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.0924, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 0.5096330642700195, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.0844, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2909090909090909, | |
| "grad_norm": 1.1907318830490112, | |
| "learning_rate": 9.142857142857144e-06, | |
| "loss": 0.0813, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3090909090909091, | |
| "grad_norm": 1.299401879310608, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 0.0749, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.32727272727272727, | |
| "grad_norm": 0.8548530340194702, | |
| "learning_rate": 1.0285714285714285e-05, | |
| "loss": 0.0661, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.34545454545454546, | |
| "grad_norm": 0.8947266936302185, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 0.0638, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 2.172971725463867, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.0624, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.38181818181818183, | |
| "grad_norm": 4.725502967834473, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.0504, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.3992273807525635, | |
| "learning_rate": 1.2571428571428572e-05, | |
| "loss": 0.0417, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.41818181818181815, | |
| "grad_norm": 0.9457690119743347, | |
| "learning_rate": 1.3142857142857145e-05, | |
| "loss": 0.0377, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.43636363636363634, | |
| "grad_norm": 1.1091430187225342, | |
| "learning_rate": 1.3714285714285716e-05, | |
| "loss": 0.0323, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 0.7338578701019287, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.0269, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4727272727272727, | |
| "grad_norm": 1.0748590230941772, | |
| "learning_rate": 1.4857142857142858e-05, | |
| "loss": 0.0275, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4909090909090909, | |
| "grad_norm": 1.0615975856781006, | |
| "learning_rate": 1.542857142857143e-05, | |
| "loss": 0.0214, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.509090909090909, | |
| "grad_norm": 0.8980767130851746, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.0224, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5272727272727272, | |
| "grad_norm": 0.8434118628501892, | |
| "learning_rate": 1.6571428571428574e-05, | |
| "loss": 0.0271, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.7067236304283142, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.0225, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "eval_f1": 0.886688162137264, | |
| "eval_f2": 0.9429803076320172, | |
| "eval_loss": 0.01607716828584671, | |
| "eval_precision": 0.8064516129032258, | |
| "eval_recall": 0.9846547314578005, | |
| "eval_runtime": 10.5831, | |
| "eval_samples_per_second": 738.914, | |
| "eval_steps_per_second": 11.622, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5636363636363636, | |
| "grad_norm": 1.111824631690979, | |
| "learning_rate": 1.7714285714285717e-05, | |
| "loss": 0.0186, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5818181818181818, | |
| "grad_norm": 1.0324747562408447, | |
| "learning_rate": 1.8285714285714288e-05, | |
| "loss": 0.0218, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.2407127618789673, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 0.0223, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6181818181818182, | |
| "grad_norm": 0.6641681790351868, | |
| "learning_rate": 1.942857142857143e-05, | |
| "loss": 0.022, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 0.8924217224121094, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0178, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6545454545454545, | |
| "grad_norm": 0.49054834246635437, | |
| "learning_rate": 1.991666666666667e-05, | |
| "loss": 0.0139, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6727272727272727, | |
| "grad_norm": 1.4703093767166138, | |
| "learning_rate": 1.9833333333333335e-05, | |
| "loss": 0.0211, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6909090909090909, | |
| "grad_norm": 2.7741594314575195, | |
| "learning_rate": 1.9750000000000002e-05, | |
| "loss": 0.0122, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7090909090909091, | |
| "grad_norm": 1.8091500997543335, | |
| "learning_rate": 1.9666666666666666e-05, | |
| "loss": 0.0191, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 3.808887243270874, | |
| "learning_rate": 1.9583333333333333e-05, | |
| "loss": 0.0168, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7454545454545455, | |
| "grad_norm": 0.5149283409118652, | |
| "learning_rate": 1.95e-05, | |
| "loss": 0.0145, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7636363636363637, | |
| "grad_norm": 0.45617809891700745, | |
| "learning_rate": 1.9416666666666667e-05, | |
| "loss": 0.0088, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7818181818181819, | |
| "grad_norm": 1.402259111404419, | |
| "learning_rate": 1.9333333333333333e-05, | |
| "loss": 0.0126, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.5392917394638062, | |
| "learning_rate": 1.925e-05, | |
| "loss": 0.0138, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 2.108272075653076, | |
| "learning_rate": 1.916666666666667e-05, | |
| "loss": 0.0165, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8363636363636363, | |
| "grad_norm": 0.6225730776786804, | |
| "learning_rate": 1.9083333333333338e-05, | |
| "loss": 0.012, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8545454545454545, | |
| "grad_norm": 1.8889803886413574, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.0097, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8727272727272727, | |
| "grad_norm": 0.1781783550977707, | |
| "learning_rate": 1.8916666666666668e-05, | |
| "loss": 0.012, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8909090909090909, | |
| "grad_norm": 0.7276476621627808, | |
| "learning_rate": 1.8833333333333335e-05, | |
| "loss": 0.0151, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 1.302780032157898, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.0124, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9272727272727272, | |
| "grad_norm": 0.8764067888259888, | |
| "learning_rate": 1.866666666666667e-05, | |
| "loss": 0.0127, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9454545454545454, | |
| "grad_norm": 0.3675801157951355, | |
| "learning_rate": 1.8583333333333336e-05, | |
| "loss": 0.011, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9636363636363636, | |
| "grad_norm": 0.413601279258728, | |
| "learning_rate": 1.8500000000000002e-05, | |
| "loss": 0.0119, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9818181818181818, | |
| "grad_norm": 0.3606299161911011, | |
| "learning_rate": 1.8416666666666666e-05, | |
| "loss": 0.0096, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6626041531562805, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.0091, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.018181818181818, | |
| "grad_norm": 0.34816664457321167, | |
| "learning_rate": 1.825e-05, | |
| "loss": 0.0064, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0363636363636364, | |
| "grad_norm": 0.7120109796524048, | |
| "learning_rate": 1.8166666666666667e-05, | |
| "loss": 0.007, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0545454545454545, | |
| "grad_norm": 0.34991776943206787, | |
| "learning_rate": 1.8083333333333334e-05, | |
| "loss": 0.0156, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.0727272727272728, | |
| "grad_norm": 0.4325370788574219, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.0116, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 1.3302485942840576, | |
| "learning_rate": 1.7916666666666667e-05, | |
| "loss": 0.007, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "eval_f1": 0.9619118745332338, | |
| "eval_f2": 0.9775349119611415, | |
| "eval_loss": 0.009453566744923592, | |
| "eval_precision": 0.9369544131910766, | |
| "eval_recall": 0.9882352941176471, | |
| "eval_runtime": 10.7346, | |
| "eval_samples_per_second": 728.488, | |
| "eval_steps_per_second": 11.458, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1090909090909091, | |
| "grad_norm": 0.5350901484489441, | |
| "learning_rate": 1.7833333333333334e-05, | |
| "loss": 0.0038, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.1272727272727272, | |
| "grad_norm": 0.19123613834381104, | |
| "learning_rate": 1.775e-05, | |
| "loss": 0.0075, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1454545454545455, | |
| "grad_norm": 0.2627851963043213, | |
| "learning_rate": 1.7666666666666668e-05, | |
| "loss": 0.0069, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.1636363636363636, | |
| "grad_norm": 0.49250972270965576, | |
| "learning_rate": 1.7583333333333335e-05, | |
| "loss": 0.009, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 1.2556400299072266, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 0.0106, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.23302438855171204, | |
| "learning_rate": 1.741666666666667e-05, | |
| "loss": 0.006, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2181818181818183, | |
| "grad_norm": 0.22926795482635498, | |
| "learning_rate": 1.7333333333333336e-05, | |
| "loss": 0.0053, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.2363636363636363, | |
| "grad_norm": 0.41634848713874817, | |
| "learning_rate": 1.7250000000000003e-05, | |
| "loss": 0.0096, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.2545454545454544, | |
| "grad_norm": 0.7806673049926758, | |
| "learning_rate": 1.7166666666666666e-05, | |
| "loss": 0.0077, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 0.6627803444862366, | |
| "learning_rate": 1.7083333333333333e-05, | |
| "loss": 0.008, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.290909090909091, | |
| "grad_norm": 0.33546727895736694, | |
| "learning_rate": 1.7e-05, | |
| "loss": 0.0074, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.309090909090909, | |
| "grad_norm": 1.327726125717163, | |
| "learning_rate": 1.6916666666666667e-05, | |
| "loss": 0.0044, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.3272727272727272, | |
| "grad_norm": 0.4449763894081116, | |
| "learning_rate": 1.6833333333333334e-05, | |
| "loss": 0.0046, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.3454545454545455, | |
| "grad_norm": 0.2766354978084564, | |
| "learning_rate": 1.675e-05, | |
| "loss": 0.0034, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 0.830558180809021, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0074, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.3818181818181818, | |
| "grad_norm": 1.0488086938858032, | |
| "learning_rate": 1.659166666666667e-05, | |
| "loss": 0.0065, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.5093031525611877, | |
| "learning_rate": 1.6508333333333336e-05, | |
| "loss": 0.0053, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.4181818181818182, | |
| "grad_norm": 0.3070843517780304, | |
| "learning_rate": 1.6425000000000003e-05, | |
| "loss": 0.0045, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.4363636363636363, | |
| "grad_norm": 1.093131422996521, | |
| "learning_rate": 1.634166666666667e-05, | |
| "loss": 0.0058, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 2.2531373500823975, | |
| "learning_rate": 1.6258333333333333e-05, | |
| "loss": 0.0065, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.4727272727272727, | |
| "grad_norm": 0.27250564098358154, | |
| "learning_rate": 1.6175e-05, | |
| "loss": 0.005, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.490909090909091, | |
| "grad_norm": 2.3462181091308594, | |
| "learning_rate": 1.6091666666666667e-05, | |
| "loss": 0.0077, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.509090909090909, | |
| "grad_norm": 0.5783445835113525, | |
| "learning_rate": 1.6008333333333334e-05, | |
| "loss": 0.0065, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.5272727272727273, | |
| "grad_norm": 2.6000328063964844, | |
| "learning_rate": 1.5925e-05, | |
| "loss": 0.0073, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 0.27279505133628845, | |
| "learning_rate": 1.5841666666666668e-05, | |
| "loss": 0.0054, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.5636363636363635, | |
| "grad_norm": 0.5557974576950073, | |
| "learning_rate": 1.5758333333333335e-05, | |
| "loss": 0.0054, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.5818181818181818, | |
| "grad_norm": 0.4363366365432739, | |
| "learning_rate": 1.5675e-05, | |
| "loss": 0.0064, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.055520545691251755, | |
| "learning_rate": 1.559166666666667e-05, | |
| "loss": 0.0036, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.6181818181818182, | |
| "grad_norm": 0.31246721744537354, | |
| "learning_rate": 1.5508333333333335e-05, | |
| "loss": 0.0035, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 0.3083712160587311, | |
| "learning_rate": 1.5425000000000002e-05, | |
| "loss": 0.0043, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "eval_f1": 0.9619188921859545, | |
| "eval_f2": 0.9814310223029569, | |
| "eval_loss": 0.007550612557679415, | |
| "eval_precision": 0.9310674964097654, | |
| "eval_recall": 0.9948849104859335, | |
| "eval_runtime": 11.1497, | |
| "eval_samples_per_second": 701.366, | |
| "eval_steps_per_second": 11.032, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6545454545454545, | |
| "grad_norm": 1.0350213050842285, | |
| "learning_rate": 1.534166666666667e-05, | |
| "loss": 0.0062, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.6727272727272728, | |
| "grad_norm": 1.7585707902908325, | |
| "learning_rate": 1.5258333333333334e-05, | |
| "loss": 0.006, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.690909090909091, | |
| "grad_norm": 0.35897427797317505, | |
| "learning_rate": 1.5175000000000001e-05, | |
| "loss": 0.0041, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.709090909090909, | |
| "grad_norm": 0.2970544993877411, | |
| "learning_rate": 1.5091666666666668e-05, | |
| "loss": 0.0058, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 0.10311456769704819, | |
| "learning_rate": 1.5008333333333333e-05, | |
| "loss": 0.0057, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.7454545454545456, | |
| "grad_norm": 1.624154806137085, | |
| "learning_rate": 1.4925e-05, | |
| "loss": 0.0088, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.7636363636363637, | |
| "grad_norm": 0.22448480129241943, | |
| "learning_rate": 1.4841666666666667e-05, | |
| "loss": 0.0038, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.7818181818181817, | |
| "grad_norm": 0.9474364519119263, | |
| "learning_rate": 1.4758333333333334e-05, | |
| "loss": 0.0044, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.05209196358919144, | |
| "learning_rate": 1.4675000000000001e-05, | |
| "loss": 0.0064, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.3231663405895233, | |
| "learning_rate": 1.4591666666666668e-05, | |
| "loss": 0.005, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8363636363636364, | |
| "grad_norm": 0.4507773220539093, | |
| "learning_rate": 1.4508333333333335e-05, | |
| "loss": 0.0047, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.8545454545454545, | |
| "grad_norm": 0.28643473982810974, | |
| "learning_rate": 1.4425e-05, | |
| "loss": 0.006, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.8727272727272726, | |
| "grad_norm": 0.30528539419174194, | |
| "learning_rate": 1.4341666666666667e-05, | |
| "loss": 0.0035, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.8909090909090909, | |
| "grad_norm": 0.7955114245414734, | |
| "learning_rate": 1.4258333333333334e-05, | |
| "loss": 0.0049, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 0.6773081421852112, | |
| "learning_rate": 1.4175e-05, | |
| "loss": 0.0058, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.9272727272727272, | |
| "grad_norm": 0.5149025917053223, | |
| "learning_rate": 1.4091666666666668e-05, | |
| "loss": 0.0036, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.9454545454545453, | |
| "grad_norm": 0.5025485754013062, | |
| "learning_rate": 1.4008333333333334e-05, | |
| "loss": 0.0073, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.9636363636363636, | |
| "grad_norm": 0.6183115839958191, | |
| "learning_rate": 1.3925000000000001e-05, | |
| "loss": 0.0038, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.981818181818182, | |
| "grad_norm": 0.33286458253860474, | |
| "learning_rate": 1.3841666666666668e-05, | |
| "loss": 0.0047, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.766334056854248, | |
| "learning_rate": 1.3758333333333333e-05, | |
| "loss": 0.0083, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.018181818181818, | |
| "grad_norm": 0.23794743418693542, | |
| "learning_rate": 1.3675e-05, | |
| "loss": 0.0027, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.036363636363636, | |
| "grad_norm": 0.32537227869033813, | |
| "learning_rate": 1.3591666666666667e-05, | |
| "loss": 0.0019, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.0545454545454547, | |
| "grad_norm": 0.3572939932346344, | |
| "learning_rate": 1.3508333333333334e-05, | |
| "loss": 0.0023, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.0727272727272728, | |
| "grad_norm": 0.2717416286468506, | |
| "learning_rate": 1.3425000000000001e-05, | |
| "loss": 0.001, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.090909090909091, | |
| "grad_norm": 0.20052389800548553, | |
| "learning_rate": 1.3341666666666668e-05, | |
| "loss": 0.0017, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.109090909090909, | |
| "grad_norm": 0.1603120118379593, | |
| "learning_rate": 1.3258333333333335e-05, | |
| "loss": 0.0031, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.1272727272727274, | |
| "grad_norm": 0.15461675822734833, | |
| "learning_rate": 1.3175e-05, | |
| "loss": 0.0021, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.1454545454545455, | |
| "grad_norm": 0.7481666803359985, | |
| "learning_rate": 1.3091666666666667e-05, | |
| "loss": 0.0033, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.1636363636363636, | |
| "grad_norm": 0.3837297558784485, | |
| "learning_rate": 1.3008333333333334e-05, | |
| "loss": 0.0018, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.09467964619398117, | |
| "learning_rate": 1.2925e-05, | |
| "loss": 0.0017, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "eval_f1": 0.9751068644707066, | |
| "eval_f2": 0.9850640113798008, | |
| "eval_loss": 0.009348779916763306, | |
| "eval_precision": 0.9589515331355094, | |
| "eval_recall": 0.9918158567774936, | |
| "eval_runtime": 10.6749, | |
| "eval_samples_per_second": 732.56, | |
| "eval_steps_per_second": 11.522, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.2888661324977875, | |
| "learning_rate": 1.2841666666666668e-05, | |
| "loss": 0.0017, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.2181818181818183, | |
| "grad_norm": 1.4225064516067505, | |
| "learning_rate": 1.2758333333333335e-05, | |
| "loss": 0.0017, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.2363636363636363, | |
| "grad_norm": 0.5475151538848877, | |
| "learning_rate": 1.2675000000000001e-05, | |
| "loss": 0.0022, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.2545454545454544, | |
| "grad_norm": 0.2563498914241791, | |
| "learning_rate": 1.2591666666666668e-05, | |
| "loss": 0.0026, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 0.09335105866193771, | |
| "learning_rate": 1.2508333333333334e-05, | |
| "loss": 0.0013, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.290909090909091, | |
| "grad_norm": 0.08890422433614731, | |
| "learning_rate": 1.2425e-05, | |
| "loss": 0.0015, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.309090909090909, | |
| "grad_norm": 0.0670776441693306, | |
| "learning_rate": 1.2341666666666667e-05, | |
| "loss": 0.0015, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.327272727272727, | |
| "grad_norm": 0.09385448694229126, | |
| "learning_rate": 1.2258333333333334e-05, | |
| "loss": 0.0022, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.3454545454545457, | |
| "grad_norm": 0.31550052762031555, | |
| "learning_rate": 1.2175000000000001e-05, | |
| "loss": 0.0082, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 0.14805318415164948, | |
| "learning_rate": 1.2091666666666668e-05, | |
| "loss": 0.003, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.381818181818182, | |
| "grad_norm": 0.5259885787963867, | |
| "learning_rate": 1.2008333333333335e-05, | |
| "loss": 0.0038, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.37176281213760376, | |
| "learning_rate": 1.1925e-05, | |
| "loss": 0.0017, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.418181818181818, | |
| "grad_norm": 0.33867013454437256, | |
| "learning_rate": 1.1841666666666667e-05, | |
| "loss": 0.0026, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.4363636363636365, | |
| "grad_norm": 0.04352513328194618, | |
| "learning_rate": 1.1758333333333334e-05, | |
| "loss": 0.002, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.4545454545454546, | |
| "grad_norm": 0.35843801498413086, | |
| "learning_rate": 1.1675000000000001e-05, | |
| "loss": 0.0028, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.4727272727272727, | |
| "grad_norm": 0.5411182045936584, | |
| "learning_rate": 1.1591666666666668e-05, | |
| "loss": 0.0019, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.4909090909090907, | |
| "grad_norm": 0.08427491784095764, | |
| "learning_rate": 1.1508333333333335e-05, | |
| "loss": 0.0017, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.509090909090909, | |
| "grad_norm": 0.27736711502075195, | |
| "learning_rate": 1.1425000000000002e-05, | |
| "loss": 0.0023, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.5272727272727273, | |
| "grad_norm": 0.06100330501794815, | |
| "learning_rate": 1.1341666666666668e-05, | |
| "loss": 0.0017, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.07736339420080185, | |
| "learning_rate": 1.1258333333333334e-05, | |
| "loss": 0.0038, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.5636363636363635, | |
| "grad_norm": 0.15126390755176544, | |
| "learning_rate": 1.1175e-05, | |
| "loss": 0.0009, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.581818181818182, | |
| "grad_norm": 0.0258785467594862, | |
| "learning_rate": 1.1091666666666667e-05, | |
| "loss": 0.001, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.8827760815620422, | |
| "learning_rate": 1.1008333333333334e-05, | |
| "loss": 0.003, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.618181818181818, | |
| "grad_norm": 0.14176161587238312, | |
| "learning_rate": 1.0925000000000001e-05, | |
| "loss": 0.0023, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.6363636363636362, | |
| "grad_norm": 0.17485152184963226, | |
| "learning_rate": 1.0841666666666668e-05, | |
| "loss": 0.0023, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.6545454545454543, | |
| "grad_norm": 0.1624346673488617, | |
| "learning_rate": 1.0758333333333335e-05, | |
| "loss": 0.0016, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.672727272727273, | |
| "grad_norm": 0.32750600576400757, | |
| "learning_rate": 1.0675e-05, | |
| "loss": 0.0018, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.690909090909091, | |
| "grad_norm": 0.3369393050670624, | |
| "learning_rate": 1.0591666666666667e-05, | |
| "loss": 0.0047, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.709090909090909, | |
| "grad_norm": 0.19913850724697113, | |
| "learning_rate": 1.0508333333333334e-05, | |
| "loss": 0.0029, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.6039556264877319, | |
| "learning_rate": 1.0425000000000001e-05, | |
| "loss": 0.0029, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "eval_f1": 0.9707426856714179, | |
| "eval_f2": 0.9838807785888077, | |
| "eval_loss": 0.008097349666059017, | |
| "eval_precision": 0.9496086105675147, | |
| "eval_recall": 0.992838874680307, | |
| "eval_runtime": 10.7985, | |
| "eval_samples_per_second": 724.178, | |
| "eval_steps_per_second": 11.391, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.7454545454545456, | |
| "grad_norm": 0.15625803172588348, | |
| "learning_rate": 1.0341666666666668e-05, | |
| "loss": 0.001, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.7636363636363637, | |
| "grad_norm": 0.5355175733566284, | |
| "learning_rate": 1.0258333333333335e-05, | |
| "loss": 0.0015, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.7818181818181817, | |
| "grad_norm": 0.054884154349565506, | |
| "learning_rate": 1.0175000000000002e-05, | |
| "loss": 0.0043, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.14347773790359497, | |
| "learning_rate": 1.0091666666666669e-05, | |
| "loss": 0.0012, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.8181818181818183, | |
| "grad_norm": 0.09393730759620667, | |
| "learning_rate": 1.0008333333333334e-05, | |
| "loss": 0.0026, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.8363636363636364, | |
| "grad_norm": 0.2671602964401245, | |
| "learning_rate": 9.925e-06, | |
| "loss": 0.0006, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.8545454545454545, | |
| "grad_norm": 0.04782993346452713, | |
| "learning_rate": 9.841666666666668e-06, | |
| "loss": 0.0011, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.8727272727272726, | |
| "grad_norm": 0.5545538067817688, | |
| "learning_rate": 9.758333333333334e-06, | |
| "loss": 0.0034, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.8909090909090907, | |
| "grad_norm": 0.18771076202392578, | |
| "learning_rate": 9.675000000000001e-06, | |
| "loss": 0.0014, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 0.022369615733623505, | |
| "learning_rate": 9.591666666666667e-06, | |
| "loss": 0.0007, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.9272727272727272, | |
| "grad_norm": 0.569296658039093, | |
| "learning_rate": 9.508333333333333e-06, | |
| "loss": 0.0016, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.9454545454545453, | |
| "grad_norm": 0.07517626136541367, | |
| "learning_rate": 9.425e-06, | |
| "loss": 0.0012, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.963636363636364, | |
| "grad_norm": 0.4265158772468567, | |
| "learning_rate": 9.341666666666667e-06, | |
| "loss": 0.0013, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.981818181818182, | |
| "grad_norm": 0.31167715787887573, | |
| "learning_rate": 9.258333333333334e-06, | |
| "loss": 0.0014, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.6325229406356812, | |
| "learning_rate": 9.175000000000001e-06, | |
| "loss": 0.0007, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.018181818181818, | |
| "grad_norm": 0.04929906874895096, | |
| "learning_rate": 9.091666666666668e-06, | |
| "loss": 0.0006, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.036363636363636, | |
| "grad_norm": 0.22075557708740234, | |
| "learning_rate": 9.008333333333335e-06, | |
| "loss": 0.0006, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.0545454545454547, | |
| "grad_norm": 0.2008703649044037, | |
| "learning_rate": 8.925e-06, | |
| "loss": 0.0018, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.0727272727272728, | |
| "grad_norm": 0.15318256616592407, | |
| "learning_rate": 8.841666666666667e-06, | |
| "loss": 0.0011, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.090909090909091, | |
| "grad_norm": 0.19851188361644745, | |
| "learning_rate": 8.758333333333334e-06, | |
| "loss": 0.0005, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.109090909090909, | |
| "grad_norm": 0.02609218843281269, | |
| "learning_rate": 8.675e-06, | |
| "loss": 0.0014, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.1272727272727274, | |
| "grad_norm": 0.02781720645725727, | |
| "learning_rate": 8.591666666666668e-06, | |
| "loss": 0.0004, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.1454545454545455, | |
| "grad_norm": 0.17195935547351837, | |
| "learning_rate": 8.508333333333335e-06, | |
| "loss": 0.0011, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.1636363636363636, | |
| "grad_norm": 0.04604584723711014, | |
| "learning_rate": 8.425000000000001e-06, | |
| "loss": 0.0017, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.1818181818181817, | |
| "grad_norm": 0.01334014069288969, | |
| "learning_rate": 8.341666666666667e-06, | |
| "loss": 0.0005, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.10181070119142532, | |
| "learning_rate": 8.258333333333334e-06, | |
| "loss": 0.0003, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.2181818181818183, | |
| "grad_norm": 0.029040852561593056, | |
| "learning_rate": 8.175e-06, | |
| "loss": 0.0002, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.2363636363636363, | |
| "grad_norm": 1.0948010683059692, | |
| "learning_rate": 8.091666666666667e-06, | |
| "loss": 0.0006, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.2545454545454544, | |
| "grad_norm": 0.19002945721149445, | |
| "learning_rate": 8.008333333333334e-06, | |
| "loss": 0.0008, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.2727272727272725, | |
| "grad_norm": 0.02836296707391739, | |
| "learning_rate": 7.925000000000001e-06, | |
| "loss": 0.0006, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.2727272727272725, | |
| "eval_f1": 0.9827235772357723, | |
| "eval_f2": 0.9866340169370472, | |
| "eval_loss": 0.0109314676374197, | |
| "eval_precision": 0.9762746087834427, | |
| "eval_recall": 0.9892583120204603, | |
| "eval_runtime": 10.4766, | |
| "eval_samples_per_second": 746.427, | |
| "eval_steps_per_second": 11.74, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.290909090909091, | |
| "grad_norm": 0.018972614780068398, | |
| "learning_rate": 7.841666666666668e-06, | |
| "loss": 0.001, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.309090909090909, | |
| "grad_norm": 0.003141665132716298, | |
| "learning_rate": 7.758333333333335e-06, | |
| "loss": 0.001, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.327272727272727, | |
| "grad_norm": 0.029703687876462936, | |
| "learning_rate": 7.675e-06, | |
| "loss": 0.0007, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.3454545454545457, | |
| "grad_norm": 0.18382185697555542, | |
| "learning_rate": 7.591666666666667e-06, | |
| "loss": 0.0004, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.3636363636363638, | |
| "grad_norm": 0.05236556753516197, | |
| "learning_rate": 7.508333333333334e-06, | |
| "loss": 0.002, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.381818181818182, | |
| "grad_norm": 0.17387185990810394, | |
| "learning_rate": 7.425000000000001e-06, | |
| "loss": 0.0009, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.008212663233280182, | |
| "learning_rate": 7.341666666666667e-06, | |
| "loss": 0.0007, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.418181818181818, | |
| "grad_norm": 0.22597701847553253, | |
| "learning_rate": 7.258333333333334e-06, | |
| "loss": 0.001, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.4363636363636365, | |
| "grad_norm": 0.07276669144630432, | |
| "learning_rate": 7.175000000000001e-06, | |
| "loss": 0.0017, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.4545454545454546, | |
| "grad_norm": 0.29078298807144165, | |
| "learning_rate": 7.091666666666667e-06, | |
| "loss": 0.0004, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.4727272727272727, | |
| "grad_norm": 0.11019200086593628, | |
| "learning_rate": 7.008333333333334e-06, | |
| "loss": 0.0005, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 3.4909090909090907, | |
| "grad_norm": 0.017450423911213875, | |
| "learning_rate": 6.925000000000001e-06, | |
| "loss": 0.0003, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.509090909090909, | |
| "grad_norm": 0.023930951952934265, | |
| "learning_rate": 6.8416666666666675e-06, | |
| "loss": 0.0013, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 3.5272727272727273, | |
| "grad_norm": 0.1692740023136139, | |
| "learning_rate": 6.7583333333333336e-06, | |
| "loss": 0.0033, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 3.5454545454545454, | |
| "grad_norm": 0.031825270503759384, | |
| "learning_rate": 6.6750000000000005e-06, | |
| "loss": 0.0008, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 3.5636363636363635, | |
| "grad_norm": 0.004583127796649933, | |
| "learning_rate": 6.591666666666667e-06, | |
| "loss": 0.0005, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 3.581818181818182, | |
| "grad_norm": 0.19434763491153717, | |
| "learning_rate": 6.508333333333334e-06, | |
| "loss": 0.0003, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.007167825475335121, | |
| "learning_rate": 6.425e-06, | |
| "loss": 0.0002, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 3.618181818181818, | |
| "grad_norm": 0.24422968924045563, | |
| "learning_rate": 6.341666666666667e-06, | |
| "loss": 0.0003, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 0.00559116480872035, | |
| "learning_rate": 6.258333333333334e-06, | |
| "loss": 0.0001, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.6545454545454543, | |
| "grad_norm": 0.3058757185935974, | |
| "learning_rate": 6.175000000000001e-06, | |
| "loss": 0.0015, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 3.672727272727273, | |
| "grad_norm": 0.008120411075651646, | |
| "learning_rate": 6.091666666666667e-06, | |
| "loss": 0.0008, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 3.690909090909091, | |
| "grad_norm": 0.007178381085395813, | |
| "learning_rate": 6.008333333333334e-06, | |
| "loss": 0.0003, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 3.709090909090909, | |
| "grad_norm": 0.12139607220888138, | |
| "learning_rate": 5.925000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 3.7272727272727275, | |
| "grad_norm": 0.16555677354335785, | |
| "learning_rate": 5.841666666666667e-06, | |
| "loss": 0.0004, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 3.7454545454545456, | |
| "grad_norm": 0.08208701014518738, | |
| "learning_rate": 5.758333333333334e-06, | |
| "loss": 0.0002, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 3.7636363636363637, | |
| "grad_norm": 0.0696110725402832, | |
| "learning_rate": 5.675000000000001e-06, | |
| "loss": 0.0008, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 3.7818181818181817, | |
| "grad_norm": 0.019171856343746185, | |
| "learning_rate": 5.591666666666668e-06, | |
| "loss": 0.0009, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.011577253229916096, | |
| "learning_rate": 5.508333333333334e-06, | |
| "loss": 0.0007, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 3.8181818181818183, | |
| "grad_norm": 0.1947954148054123, | |
| "learning_rate": 5.4250000000000006e-06, | |
| "loss": 0.0005, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.8181818181818183, | |
| "eval_f1": 0.9789500380420999, | |
| "eval_f2": 0.9838907014681892, | |
| "eval_loss": 0.012196212075650692, | |
| "eval_precision": 0.9708249496981891, | |
| "eval_recall": 0.9872122762148338, | |
| "eval_runtime": 10.6686, | |
| "eval_samples_per_second": 732.992, | |
| "eval_steps_per_second": 11.529, | |
| "step": 2100 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.07084650174464e+16, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |