{ "best_global_step": 1800, "best_metric": 0.9762746087834427, "best_model_checkpoint": "/workspace/AI/Trend_Primus-FineWeb_Filtering-pipeline/securebert_finetuned/defensive_vs_rest/checkpoint-1800", "epoch": 3.8181818181818183, "eval_steps": 300, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01818181818181818, "grad_norm": 0.4759802222251892, "learning_rate": 5.714285714285715e-07, "loss": 0.1542, "step": 10 }, { "epoch": 0.03636363636363636, "grad_norm": 0.3740444481372833, "learning_rate": 1.142857142857143e-06, "loss": 0.1623, "step": 20 }, { "epoch": 0.05454545454545454, "grad_norm": 0.5404930710792542, "learning_rate": 1.7142857142857145e-06, "loss": 0.1582, "step": 30 }, { "epoch": 0.07272727272727272, "grad_norm": 0.3705693781375885, "learning_rate": 2.285714285714286e-06, "loss": 0.1422, "step": 40 }, { "epoch": 0.09090909090909091, "grad_norm": 0.4376075565814972, "learning_rate": 2.8571428571428573e-06, "loss": 0.1403, "step": 50 }, { "epoch": 0.10909090909090909, "grad_norm": 0.46200892329216003, "learning_rate": 3.428571428571429e-06, "loss": 0.142, "step": 60 }, { "epoch": 0.12727272727272726, "grad_norm": 0.2855919599533081, "learning_rate": 4.000000000000001e-06, "loss": 0.144, "step": 70 }, { "epoch": 0.14545454545454545, "grad_norm": 0.33721357583999634, "learning_rate": 4.571428571428572e-06, "loss": 0.1401, "step": 80 }, { "epoch": 0.16363636363636364, "grad_norm": 0.6751205921173096, "learning_rate": 5.142857142857142e-06, "loss": 0.132, "step": 90 }, { "epoch": 0.18181818181818182, "grad_norm": 1.1521556377410889, "learning_rate": 5.7142857142857145e-06, "loss": 0.1277, "step": 100 }, { "epoch": 0.2, "grad_norm": 0.6443154811859131, "learning_rate": 6.285714285714286e-06, "loss": 0.1079, "step": 110 }, { "epoch": 0.21818181818181817, "grad_norm": 0.6698077321052551, "learning_rate": 6.857142857142858e-06, "loss": 0.1066, "step": 120 }, { "epoch": 0.23636363636363636, "grad_norm": 0.8653299808502197, "learning_rate": 7.428571428571429e-06, "loss": 0.0943, "step": 130 }, { "epoch": 0.2545454545454545, "grad_norm": 1.1476327180862427, "learning_rate": 8.000000000000001e-06, "loss": 0.0924, "step": 140 }, { "epoch": 0.2727272727272727, "grad_norm": 0.5096330642700195, "learning_rate": 8.571428571428571e-06, "loss": 0.0844, "step": 150 }, { "epoch": 0.2909090909090909, "grad_norm": 1.1907318830490112, "learning_rate": 9.142857142857144e-06, "loss": 0.0813, "step": 160 }, { "epoch": 0.3090909090909091, "grad_norm": 1.299401879310608, "learning_rate": 9.714285714285715e-06, "loss": 0.0749, "step": 170 }, { "epoch": 0.32727272727272727, "grad_norm": 0.8548530340194702, "learning_rate": 1.0285714285714285e-05, "loss": 0.0661, "step": 180 }, { "epoch": 0.34545454545454546, "grad_norm": 0.8947266936302185, "learning_rate": 1.0857142857142858e-05, "loss": 0.0638, "step": 190 }, { "epoch": 0.36363636363636365, "grad_norm": 2.172971725463867, "learning_rate": 1.1428571428571429e-05, "loss": 0.0624, "step": 200 }, { "epoch": 0.38181818181818183, "grad_norm": 4.725502967834473, "learning_rate": 1.2e-05, "loss": 0.0504, "step": 210 }, { "epoch": 0.4, "grad_norm": 1.3992273807525635, "learning_rate": 1.2571428571428572e-05, "loss": 0.0417, "step": 220 }, { "epoch": 0.41818181818181815, "grad_norm": 0.9457690119743347, "learning_rate": 1.3142857142857145e-05, "loss": 0.0377, "step": 230 }, { "epoch": 0.43636363636363634, "grad_norm": 1.1091430187225342, "learning_rate": 1.3714285714285716e-05, "loss": 0.0323, "step": 240 }, { "epoch": 0.45454545454545453, "grad_norm": 0.7338578701019287, "learning_rate": 1.4285714285714287e-05, "loss": 0.0269, "step": 250 }, { "epoch": 0.4727272727272727, "grad_norm": 1.0748590230941772, "learning_rate": 1.4857142857142858e-05, "loss": 0.0275, "step": 260 }, { "epoch": 0.4909090909090909, "grad_norm": 1.0615975856781006, "learning_rate": 1.542857142857143e-05, "loss": 0.0214, "step": 270 }, { "epoch": 0.509090909090909, "grad_norm": 0.8980767130851746, "learning_rate": 1.6000000000000003e-05, "loss": 0.0224, "step": 280 }, { "epoch": 0.5272727272727272, "grad_norm": 0.8434118628501892, "learning_rate": 1.6571428571428574e-05, "loss": 0.0271, "step": 290 }, { "epoch": 0.5454545454545454, "grad_norm": 0.7067236304283142, "learning_rate": 1.7142857142857142e-05, "loss": 0.0225, "step": 300 }, { "epoch": 0.5454545454545454, "eval_f1": 0.886688162137264, "eval_f2": 0.9429803076320172, "eval_loss": 0.01607716828584671, "eval_precision": 0.8064516129032258, "eval_recall": 0.9846547314578005, "eval_runtime": 10.5831, "eval_samples_per_second": 738.914, "eval_steps_per_second": 11.622, "step": 300 }, { "epoch": 0.5636363636363636, "grad_norm": 1.111824631690979, "learning_rate": 1.7714285714285717e-05, "loss": 0.0186, "step": 310 }, { "epoch": 0.5818181818181818, "grad_norm": 1.0324747562408447, "learning_rate": 1.8285714285714288e-05, "loss": 0.0218, "step": 320 }, { "epoch": 0.6, "grad_norm": 1.2407127618789673, "learning_rate": 1.885714285714286e-05, "loss": 0.0223, "step": 330 }, { "epoch": 0.6181818181818182, "grad_norm": 0.6641681790351868, "learning_rate": 1.942857142857143e-05, "loss": 0.022, "step": 340 }, { "epoch": 0.6363636363636364, "grad_norm": 0.8924217224121094, "learning_rate": 2e-05, "loss": 0.0178, "step": 350 }, { "epoch": 0.6545454545454545, "grad_norm": 0.49054834246635437, "learning_rate": 1.991666666666667e-05, "loss": 0.0139, "step": 360 }, { "epoch": 0.6727272727272727, "grad_norm": 1.4703093767166138, "learning_rate": 1.9833333333333335e-05, "loss": 0.0211, "step": 370 }, { "epoch": 0.6909090909090909, "grad_norm": 2.7741594314575195, "learning_rate": 1.9750000000000002e-05, "loss": 0.0122, "step": 380 }, { "epoch": 0.7090909090909091, "grad_norm": 1.8091500997543335, "learning_rate": 1.9666666666666666e-05, "loss": 0.0191, "step": 390 }, { "epoch": 0.7272727272727273, "grad_norm": 3.808887243270874, "learning_rate": 1.9583333333333333e-05, "loss": 0.0168, "step": 400 }, { "epoch": 0.7454545454545455, "grad_norm": 0.5149283409118652, "learning_rate": 1.95e-05, "loss": 0.0145, "step": 410 }, { "epoch": 0.7636363636363637, "grad_norm": 0.45617809891700745, "learning_rate": 1.9416666666666667e-05, "loss": 0.0088, "step": 420 }, { "epoch": 0.7818181818181819, "grad_norm": 1.402259111404419, "learning_rate": 1.9333333333333333e-05, "loss": 0.0126, "step": 430 }, { "epoch": 0.8, "grad_norm": 1.5392917394638062, "learning_rate": 1.925e-05, "loss": 0.0138, "step": 440 }, { "epoch": 0.8181818181818182, "grad_norm": 2.108272075653076, "learning_rate": 1.916666666666667e-05, "loss": 0.0165, "step": 450 }, { "epoch": 0.8363636363636363, "grad_norm": 0.6225730776786804, "learning_rate": 1.9083333333333338e-05, "loss": 0.012, "step": 460 }, { "epoch": 0.8545454545454545, "grad_norm": 1.8889803886413574, "learning_rate": 1.9e-05, "loss": 0.0097, "step": 470 }, { "epoch": 0.8727272727272727, "grad_norm": 0.1781783550977707, "learning_rate": 1.8916666666666668e-05, "loss": 0.012, "step": 480 }, { "epoch": 0.8909090909090909, "grad_norm": 0.7276476621627808, "learning_rate": 1.8833333333333335e-05, "loss": 0.0151, "step": 490 }, { "epoch": 0.9090909090909091, "grad_norm": 1.302780032157898, "learning_rate": 1.8750000000000002e-05, "loss": 0.0124, "step": 500 }, { "epoch": 0.9272727272727272, "grad_norm": 0.8764067888259888, "learning_rate": 1.866666666666667e-05, "loss": 0.0127, "step": 510 }, { "epoch": 0.9454545454545454, "grad_norm": 0.3675801157951355, "learning_rate": 1.8583333333333336e-05, "loss": 0.011, "step": 520 }, { "epoch": 0.9636363636363636, "grad_norm": 0.413601279258728, "learning_rate": 1.8500000000000002e-05, "loss": 0.0119, "step": 530 }, { "epoch": 0.9818181818181818, "grad_norm": 0.3606299161911011, "learning_rate": 1.8416666666666666e-05, "loss": 0.0096, "step": 540 }, { "epoch": 1.0, "grad_norm": 0.6626041531562805, "learning_rate": 1.8333333333333333e-05, "loss": 0.0091, "step": 550 }, { "epoch": 1.018181818181818, "grad_norm": 0.34816664457321167, "learning_rate": 1.825e-05, "loss": 0.0064, "step": 560 }, { "epoch": 1.0363636363636364, "grad_norm": 0.7120109796524048, "learning_rate": 1.8166666666666667e-05, "loss": 0.007, "step": 570 }, { "epoch": 1.0545454545454545, "grad_norm": 0.34991776943206787, "learning_rate": 1.8083333333333334e-05, "loss": 0.0156, "step": 580 }, { "epoch": 1.0727272727272728, "grad_norm": 0.4325370788574219, "learning_rate": 1.8e-05, "loss": 0.0116, "step": 590 }, { "epoch": 1.0909090909090908, "grad_norm": 1.3302485942840576, "learning_rate": 1.7916666666666667e-05, "loss": 0.007, "step": 600 }, { "epoch": 1.0909090909090908, "eval_f1": 0.9619118745332338, "eval_f2": 0.9775349119611415, "eval_loss": 0.009453566744923592, "eval_precision": 0.9369544131910766, "eval_recall": 0.9882352941176471, "eval_runtime": 10.7346, "eval_samples_per_second": 728.488, "eval_steps_per_second": 11.458, "step": 600 }, { "epoch": 1.1090909090909091, "grad_norm": 0.5350901484489441, "learning_rate": 1.7833333333333334e-05, "loss": 0.0038, "step": 610 }, { "epoch": 1.1272727272727272, "grad_norm": 0.19123613834381104, "learning_rate": 1.775e-05, "loss": 0.0075, "step": 620 }, { "epoch": 1.1454545454545455, "grad_norm": 0.2627851963043213, "learning_rate": 1.7666666666666668e-05, "loss": 0.0069, "step": 630 }, { "epoch": 1.1636363636363636, "grad_norm": 0.49250972270965576, "learning_rate": 1.7583333333333335e-05, "loss": 0.009, "step": 640 }, { "epoch": 1.1818181818181819, "grad_norm": 1.2556400299072266, "learning_rate": 1.7500000000000002e-05, "loss": 0.0106, "step": 650 }, { "epoch": 1.2, "grad_norm": 0.23302438855171204, "learning_rate": 1.741666666666667e-05, "loss": 0.006, "step": 660 }, { "epoch": 1.2181818181818183, "grad_norm": 0.22926795482635498, "learning_rate": 1.7333333333333336e-05, "loss": 0.0053, "step": 670 }, { "epoch": 1.2363636363636363, "grad_norm": 0.41634848713874817, "learning_rate": 1.7250000000000003e-05, "loss": 0.0096, "step": 680 }, { "epoch": 1.2545454545454544, "grad_norm": 0.7806673049926758, "learning_rate": 1.7166666666666666e-05, "loss": 0.0077, "step": 690 }, { "epoch": 1.2727272727272727, "grad_norm": 0.6627803444862366, "learning_rate": 1.7083333333333333e-05, "loss": 0.008, "step": 700 }, { "epoch": 1.290909090909091, "grad_norm": 0.33546727895736694, "learning_rate": 1.7e-05, "loss": 0.0074, "step": 710 }, { "epoch": 1.309090909090909, "grad_norm": 1.327726125717163, "learning_rate": 1.6916666666666667e-05, "loss": 0.0044, "step": 720 }, { "epoch": 1.3272727272727272, "grad_norm": 0.4449763894081116, "learning_rate": 1.6833333333333334e-05, "loss": 0.0046, "step": 730 }, { "epoch": 1.3454545454545455, "grad_norm": 0.2766354978084564, "learning_rate": 1.675e-05, "loss": 0.0034, "step": 740 }, { "epoch": 1.3636363636363638, "grad_norm": 0.830558180809021, "learning_rate": 1.6666666666666667e-05, "loss": 0.0074, "step": 750 }, { "epoch": 1.3818181818181818, "grad_norm": 1.0488086938858032, "learning_rate": 1.659166666666667e-05, "loss": 0.0065, "step": 760 }, { "epoch": 1.4, "grad_norm": 0.5093031525611877, "learning_rate": 1.6508333333333336e-05, "loss": 0.0053, "step": 770 }, { "epoch": 1.4181818181818182, "grad_norm": 0.3070843517780304, "learning_rate": 1.6425000000000003e-05, "loss": 0.0045, "step": 780 }, { "epoch": 1.4363636363636363, "grad_norm": 1.093131422996521, "learning_rate": 1.634166666666667e-05, "loss": 0.0058, "step": 790 }, { "epoch": 1.4545454545454546, "grad_norm": 2.2531373500823975, "learning_rate": 1.6258333333333333e-05, "loss": 0.0065, "step": 800 }, { "epoch": 1.4727272727272727, "grad_norm": 0.27250564098358154, "learning_rate": 1.6175e-05, "loss": 0.005, "step": 810 }, { "epoch": 1.490909090909091, "grad_norm": 2.3462181091308594, "learning_rate": 1.6091666666666667e-05, "loss": 0.0077, "step": 820 }, { "epoch": 1.509090909090909, "grad_norm": 0.5783445835113525, "learning_rate": 1.6008333333333334e-05, "loss": 0.0065, "step": 830 }, { "epoch": 1.5272727272727273, "grad_norm": 2.6000328063964844, "learning_rate": 1.5925e-05, "loss": 0.0073, "step": 840 }, { "epoch": 1.5454545454545454, "grad_norm": 0.27279505133628845, "learning_rate": 1.5841666666666668e-05, "loss": 0.0054, "step": 850 }, { "epoch": 1.5636363636363635, "grad_norm": 0.5557974576950073, "learning_rate": 1.5758333333333335e-05, "loss": 0.0054, "step": 860 }, { "epoch": 1.5818181818181818, "grad_norm": 0.4363366365432739, "learning_rate": 1.5675e-05, "loss": 0.0064, "step": 870 }, { "epoch": 1.6, "grad_norm": 0.055520545691251755, "learning_rate": 1.559166666666667e-05, "loss": 0.0036, "step": 880 }, { "epoch": 1.6181818181818182, "grad_norm": 0.31246721744537354, "learning_rate": 1.5508333333333335e-05, "loss": 0.0035, "step": 890 }, { "epoch": 1.6363636363636362, "grad_norm": 0.3083712160587311, "learning_rate": 1.5425000000000002e-05, "loss": 0.0043, "step": 900 }, { "epoch": 1.6363636363636362, "eval_f1": 0.9619188921859545, "eval_f2": 0.9814310223029569, "eval_loss": 0.007550612557679415, "eval_precision": 0.9310674964097654, "eval_recall": 0.9948849104859335, "eval_runtime": 11.1497, "eval_samples_per_second": 701.366, "eval_steps_per_second": 11.032, "step": 900 }, { "epoch": 1.6545454545454545, "grad_norm": 1.0350213050842285, "learning_rate": 1.534166666666667e-05, "loss": 0.0062, "step": 910 }, { "epoch": 1.6727272727272728, "grad_norm": 1.7585707902908325, "learning_rate": 1.5258333333333334e-05, "loss": 0.006, "step": 920 }, { "epoch": 1.690909090909091, "grad_norm": 0.35897427797317505, "learning_rate": 1.5175000000000001e-05, "loss": 0.0041, "step": 930 }, { "epoch": 1.709090909090909, "grad_norm": 0.2970544993877411, "learning_rate": 1.5091666666666668e-05, "loss": 0.0058, "step": 940 }, { "epoch": 1.7272727272727273, "grad_norm": 0.10311456769704819, "learning_rate": 1.5008333333333333e-05, "loss": 0.0057, "step": 950 }, { "epoch": 1.7454545454545456, "grad_norm": 1.624154806137085, "learning_rate": 1.4925e-05, "loss": 0.0088, "step": 960 }, { "epoch": 1.7636363636363637, "grad_norm": 0.22448480129241943, "learning_rate": 1.4841666666666667e-05, "loss": 0.0038, "step": 970 }, { "epoch": 1.7818181818181817, "grad_norm": 0.9474364519119263, "learning_rate": 1.4758333333333334e-05, "loss": 0.0044, "step": 980 }, { "epoch": 1.8, "grad_norm": 0.05209196358919144, "learning_rate": 1.4675000000000001e-05, "loss": 0.0064, "step": 990 }, { "epoch": 1.8181818181818183, "grad_norm": 0.3231663405895233, "learning_rate": 1.4591666666666668e-05, "loss": 0.005, "step": 1000 }, { "epoch": 1.8363636363636364, "grad_norm": 0.4507773220539093, "learning_rate": 1.4508333333333335e-05, "loss": 0.0047, "step": 1010 }, { "epoch": 1.8545454545454545, "grad_norm": 0.28643473982810974, "learning_rate": 1.4425e-05, "loss": 0.006, "step": 1020 }, { "epoch": 1.8727272727272726, "grad_norm": 0.30528539419174194, "learning_rate": 1.4341666666666667e-05, "loss": 0.0035, "step": 1030 }, { "epoch": 1.8909090909090909, "grad_norm": 0.7955114245414734, "learning_rate": 1.4258333333333334e-05, "loss": 0.0049, "step": 1040 }, { "epoch": 1.9090909090909092, "grad_norm": 0.6773081421852112, "learning_rate": 1.4175e-05, "loss": 0.0058, "step": 1050 }, { "epoch": 1.9272727272727272, "grad_norm": 0.5149025917053223, "learning_rate": 1.4091666666666668e-05, "loss": 0.0036, "step": 1060 }, { "epoch": 1.9454545454545453, "grad_norm": 0.5025485754013062, "learning_rate": 1.4008333333333334e-05, "loss": 0.0073, "step": 1070 }, { "epoch": 1.9636363636363636, "grad_norm": 0.6183115839958191, "learning_rate": 1.3925000000000001e-05, "loss": 0.0038, "step": 1080 }, { "epoch": 1.981818181818182, "grad_norm": 0.33286458253860474, "learning_rate": 1.3841666666666668e-05, "loss": 0.0047, "step": 1090 }, { "epoch": 2.0, "grad_norm": 0.766334056854248, "learning_rate": 1.3758333333333333e-05, "loss": 0.0083, "step": 1100 }, { "epoch": 2.018181818181818, "grad_norm": 0.23794743418693542, "learning_rate": 1.3675e-05, "loss": 0.0027, "step": 1110 }, { "epoch": 2.036363636363636, "grad_norm": 0.32537227869033813, "learning_rate": 1.3591666666666667e-05, "loss": 0.0019, "step": 1120 }, { "epoch": 2.0545454545454547, "grad_norm": 0.3572939932346344, "learning_rate": 1.3508333333333334e-05, "loss": 0.0023, "step": 1130 }, { "epoch": 2.0727272727272728, "grad_norm": 0.2717416286468506, "learning_rate": 1.3425000000000001e-05, "loss": 0.001, "step": 1140 }, { "epoch": 2.090909090909091, "grad_norm": 0.20052389800548553, "learning_rate": 1.3341666666666668e-05, "loss": 0.0017, "step": 1150 }, { "epoch": 2.109090909090909, "grad_norm": 0.1603120118379593, "learning_rate": 1.3258333333333335e-05, "loss": 0.0031, "step": 1160 }, { "epoch": 2.1272727272727274, "grad_norm": 0.15461675822734833, "learning_rate": 1.3175e-05, "loss": 0.0021, "step": 1170 }, { "epoch": 2.1454545454545455, "grad_norm": 0.7481666803359985, "learning_rate": 1.3091666666666667e-05, "loss": 0.0033, "step": 1180 }, { "epoch": 2.1636363636363636, "grad_norm": 0.3837297558784485, "learning_rate": 1.3008333333333334e-05, "loss": 0.0018, "step": 1190 }, { "epoch": 2.1818181818181817, "grad_norm": 0.09467964619398117, "learning_rate": 1.2925e-05, "loss": 0.0017, "step": 1200 }, { "epoch": 2.1818181818181817, "eval_f1": 0.9751068644707066, "eval_f2": 0.9850640113798008, "eval_loss": 0.009348779916763306, "eval_precision": 0.9589515331355094, "eval_recall": 0.9918158567774936, "eval_runtime": 10.6749, "eval_samples_per_second": 732.56, "eval_steps_per_second": 11.522, "step": 1200 }, { "epoch": 2.2, "grad_norm": 0.2888661324977875, "learning_rate": 1.2841666666666668e-05, "loss": 0.0017, "step": 1210 }, { "epoch": 2.2181818181818183, "grad_norm": 1.4225064516067505, "learning_rate": 1.2758333333333335e-05, "loss": 0.0017, "step": 1220 }, { "epoch": 2.2363636363636363, "grad_norm": 0.5475151538848877, "learning_rate": 1.2675000000000001e-05, "loss": 0.0022, "step": 1230 }, { "epoch": 2.2545454545454544, "grad_norm": 0.2563498914241791, "learning_rate": 1.2591666666666668e-05, "loss": 0.0026, "step": 1240 }, { "epoch": 2.2727272727272725, "grad_norm": 0.09335105866193771, "learning_rate": 1.2508333333333334e-05, "loss": 0.0013, "step": 1250 }, { "epoch": 2.290909090909091, "grad_norm": 0.08890422433614731, "learning_rate": 1.2425e-05, "loss": 0.0015, "step": 1260 }, { "epoch": 2.309090909090909, "grad_norm": 0.0670776441693306, "learning_rate": 1.2341666666666667e-05, "loss": 0.0015, "step": 1270 }, { "epoch": 2.327272727272727, "grad_norm": 0.09385448694229126, "learning_rate": 1.2258333333333334e-05, "loss": 0.0022, "step": 1280 }, { "epoch": 2.3454545454545457, "grad_norm": 0.31550052762031555, "learning_rate": 1.2175000000000001e-05, "loss": 0.0082, "step": 1290 }, { "epoch": 2.3636363636363638, "grad_norm": 0.14805318415164948, "learning_rate": 1.2091666666666668e-05, "loss": 0.003, "step": 1300 }, { "epoch": 2.381818181818182, "grad_norm": 0.5259885787963867, "learning_rate": 1.2008333333333335e-05, "loss": 0.0038, "step": 1310 }, { "epoch": 2.4, "grad_norm": 0.37176281213760376, "learning_rate": 1.1925e-05, "loss": 0.0017, "step": 1320 }, { "epoch": 2.418181818181818, "grad_norm": 0.33867013454437256, "learning_rate": 1.1841666666666667e-05, "loss": 0.0026, "step": 1330 }, { "epoch": 2.4363636363636365, "grad_norm": 0.04352513328194618, "learning_rate": 1.1758333333333334e-05, "loss": 0.002, "step": 1340 }, { "epoch": 2.4545454545454546, "grad_norm": 0.35843801498413086, "learning_rate": 1.1675000000000001e-05, "loss": 0.0028, "step": 1350 }, { "epoch": 2.4727272727272727, "grad_norm": 0.5411182045936584, "learning_rate": 1.1591666666666668e-05, "loss": 0.0019, "step": 1360 }, { "epoch": 2.4909090909090907, "grad_norm": 0.08427491784095764, "learning_rate": 1.1508333333333335e-05, "loss": 0.0017, "step": 1370 }, { "epoch": 2.509090909090909, "grad_norm": 0.27736711502075195, "learning_rate": 1.1425000000000002e-05, "loss": 0.0023, "step": 1380 }, { "epoch": 2.5272727272727273, "grad_norm": 0.06100330501794815, "learning_rate": 1.1341666666666668e-05, "loss": 0.0017, "step": 1390 }, { "epoch": 2.5454545454545454, "grad_norm": 0.07736339420080185, "learning_rate": 1.1258333333333334e-05, "loss": 0.0038, "step": 1400 }, { "epoch": 2.5636363636363635, "grad_norm": 0.15126390755176544, "learning_rate": 1.1175e-05, "loss": 0.0009, "step": 1410 }, { "epoch": 2.581818181818182, "grad_norm": 0.0258785467594862, "learning_rate": 1.1091666666666667e-05, "loss": 0.001, "step": 1420 }, { "epoch": 2.6, "grad_norm": 0.8827760815620422, "learning_rate": 1.1008333333333334e-05, "loss": 0.003, "step": 1430 }, { "epoch": 2.618181818181818, "grad_norm": 0.14176161587238312, "learning_rate": 1.0925000000000001e-05, "loss": 0.0023, "step": 1440 }, { "epoch": 2.6363636363636362, "grad_norm": 0.17485152184963226, "learning_rate": 1.0841666666666668e-05, "loss": 0.0023, "step": 1450 }, { "epoch": 2.6545454545454543, "grad_norm": 0.1624346673488617, "learning_rate": 1.0758333333333335e-05, "loss": 0.0016, "step": 1460 }, { "epoch": 2.672727272727273, "grad_norm": 0.32750600576400757, "learning_rate": 1.0675e-05, "loss": 0.0018, "step": 1470 }, { "epoch": 2.690909090909091, "grad_norm": 0.3369393050670624, "learning_rate": 1.0591666666666667e-05, "loss": 0.0047, "step": 1480 }, { "epoch": 2.709090909090909, "grad_norm": 0.19913850724697113, "learning_rate": 1.0508333333333334e-05, "loss": 0.0029, "step": 1490 }, { "epoch": 2.7272727272727275, "grad_norm": 0.6039556264877319, "learning_rate": 1.0425000000000001e-05, "loss": 0.0029, "step": 1500 }, { "epoch": 2.7272727272727275, "eval_f1": 0.9707426856714179, "eval_f2": 0.9838807785888077, "eval_loss": 0.008097349666059017, "eval_precision": 0.9496086105675147, "eval_recall": 0.992838874680307, "eval_runtime": 10.7985, "eval_samples_per_second": 724.178, "eval_steps_per_second": 11.391, "step": 1500 }, { "epoch": 2.7454545454545456, "grad_norm": 0.15625803172588348, "learning_rate": 1.0341666666666668e-05, "loss": 0.001, "step": 1510 }, { "epoch": 2.7636363636363637, "grad_norm": 0.5355175733566284, "learning_rate": 1.0258333333333335e-05, "loss": 0.0015, "step": 1520 }, { "epoch": 2.7818181818181817, "grad_norm": 0.054884154349565506, "learning_rate": 1.0175000000000002e-05, "loss": 0.0043, "step": 1530 }, { "epoch": 2.8, "grad_norm": 0.14347773790359497, "learning_rate": 1.0091666666666669e-05, "loss": 0.0012, "step": 1540 }, { "epoch": 2.8181818181818183, "grad_norm": 0.09393730759620667, "learning_rate": 1.0008333333333334e-05, "loss": 0.0026, "step": 1550 }, { "epoch": 2.8363636363636364, "grad_norm": 0.2671602964401245, "learning_rate": 9.925e-06, "loss": 0.0006, "step": 1560 }, { "epoch": 2.8545454545454545, "grad_norm": 0.04782993346452713, "learning_rate": 9.841666666666668e-06, "loss": 0.0011, "step": 1570 }, { "epoch": 2.8727272727272726, "grad_norm": 0.5545538067817688, "learning_rate": 9.758333333333334e-06, "loss": 0.0034, "step": 1580 }, { "epoch": 2.8909090909090907, "grad_norm": 0.18771076202392578, "learning_rate": 9.675000000000001e-06, "loss": 0.0014, "step": 1590 }, { "epoch": 2.909090909090909, "grad_norm": 0.022369615733623505, "learning_rate": 9.591666666666667e-06, "loss": 0.0007, "step": 1600 }, { "epoch": 2.9272727272727272, "grad_norm": 0.569296658039093, "learning_rate": 9.508333333333333e-06, "loss": 0.0016, "step": 1610 }, { "epoch": 2.9454545454545453, "grad_norm": 0.07517626136541367, "learning_rate": 9.425e-06, "loss": 0.0012, "step": 1620 }, { "epoch": 2.963636363636364, "grad_norm": 0.4265158772468567, "learning_rate": 9.341666666666667e-06, "loss": 0.0013, "step": 1630 }, { "epoch": 2.981818181818182, "grad_norm": 0.31167715787887573, "learning_rate": 9.258333333333334e-06, "loss": 0.0014, "step": 1640 }, { "epoch": 3.0, "grad_norm": 0.6325229406356812, "learning_rate": 9.175000000000001e-06, "loss": 0.0007, "step": 1650 }, { "epoch": 3.018181818181818, "grad_norm": 0.04929906874895096, "learning_rate": 9.091666666666668e-06, "loss": 0.0006, "step": 1660 }, { "epoch": 3.036363636363636, "grad_norm": 0.22075557708740234, "learning_rate": 9.008333333333335e-06, "loss": 0.0006, "step": 1670 }, { "epoch": 3.0545454545454547, "grad_norm": 0.2008703649044037, "learning_rate": 8.925e-06, "loss": 0.0018, "step": 1680 }, { "epoch": 3.0727272727272728, "grad_norm": 0.15318256616592407, "learning_rate": 8.841666666666667e-06, "loss": 0.0011, "step": 1690 }, { "epoch": 3.090909090909091, "grad_norm": 0.19851188361644745, "learning_rate": 8.758333333333334e-06, "loss": 0.0005, "step": 1700 }, { "epoch": 3.109090909090909, "grad_norm": 0.02609218843281269, "learning_rate": 8.675e-06, "loss": 0.0014, "step": 1710 }, { "epoch": 3.1272727272727274, "grad_norm": 0.02781720645725727, "learning_rate": 8.591666666666668e-06, "loss": 0.0004, "step": 1720 }, { "epoch": 3.1454545454545455, "grad_norm": 0.17195935547351837, "learning_rate": 8.508333333333335e-06, "loss": 0.0011, "step": 1730 }, { "epoch": 3.1636363636363636, "grad_norm": 0.04604584723711014, "learning_rate": 8.425000000000001e-06, "loss": 0.0017, "step": 1740 }, { "epoch": 3.1818181818181817, "grad_norm": 0.01334014069288969, "learning_rate": 8.341666666666667e-06, "loss": 0.0005, "step": 1750 }, { "epoch": 3.2, "grad_norm": 0.10181070119142532, "learning_rate": 8.258333333333334e-06, "loss": 0.0003, "step": 1760 }, { "epoch": 3.2181818181818183, "grad_norm": 0.029040852561593056, "learning_rate": 8.175e-06, "loss": 0.0002, "step": 1770 }, { "epoch": 3.2363636363636363, "grad_norm": 1.0948010683059692, "learning_rate": 8.091666666666667e-06, "loss": 0.0006, "step": 1780 }, { "epoch": 3.2545454545454544, "grad_norm": 0.19002945721149445, "learning_rate": 8.008333333333334e-06, "loss": 0.0008, "step": 1790 }, { "epoch": 3.2727272727272725, "grad_norm": 0.02836296707391739, "learning_rate": 7.925000000000001e-06, "loss": 0.0006, "step": 1800 }, { "epoch": 3.2727272727272725, "eval_f1": 0.9827235772357723, "eval_f2": 0.9866340169370472, "eval_loss": 0.0109314676374197, "eval_precision": 0.9762746087834427, "eval_recall": 0.9892583120204603, "eval_runtime": 10.4766, "eval_samples_per_second": 746.427, "eval_steps_per_second": 11.74, "step": 1800 }, { "epoch": 3.290909090909091, "grad_norm": 0.018972614780068398, "learning_rate": 7.841666666666668e-06, "loss": 0.001, "step": 1810 }, { "epoch": 3.309090909090909, "grad_norm": 0.003141665132716298, "learning_rate": 7.758333333333335e-06, "loss": 0.001, "step": 1820 }, { "epoch": 3.327272727272727, "grad_norm": 0.029703687876462936, "learning_rate": 7.675e-06, "loss": 0.0007, "step": 1830 }, { "epoch": 3.3454545454545457, "grad_norm": 0.18382185697555542, "learning_rate": 7.591666666666667e-06, "loss": 0.0004, "step": 1840 }, { "epoch": 3.3636363636363638, "grad_norm": 0.05236556753516197, "learning_rate": 7.508333333333334e-06, "loss": 0.002, "step": 1850 }, { "epoch": 3.381818181818182, "grad_norm": 0.17387185990810394, "learning_rate": 7.425000000000001e-06, "loss": 0.0009, "step": 1860 }, { "epoch": 3.4, "grad_norm": 0.008212663233280182, "learning_rate": 7.341666666666667e-06, "loss": 0.0007, "step": 1870 }, { "epoch": 3.418181818181818, "grad_norm": 0.22597701847553253, "learning_rate": 7.258333333333334e-06, "loss": 0.001, "step": 1880 }, { "epoch": 3.4363636363636365, "grad_norm": 0.07276669144630432, "learning_rate": 7.175000000000001e-06, "loss": 0.0017, "step": 1890 }, { "epoch": 3.4545454545454546, "grad_norm": 0.29078298807144165, "learning_rate": 7.091666666666667e-06, "loss": 0.0004, "step": 1900 }, { "epoch": 3.4727272727272727, "grad_norm": 0.11019200086593628, "learning_rate": 7.008333333333334e-06, "loss": 0.0005, "step": 1910 }, { "epoch": 3.4909090909090907, "grad_norm": 0.017450423911213875, "learning_rate": 6.925000000000001e-06, "loss": 0.0003, "step": 1920 }, { "epoch": 3.509090909090909, "grad_norm": 0.023930951952934265, "learning_rate": 6.8416666666666675e-06, "loss": 0.0013, "step": 1930 }, { "epoch": 3.5272727272727273, "grad_norm": 0.1692740023136139, "learning_rate": 6.7583333333333336e-06, "loss": 0.0033, "step": 1940 }, { "epoch": 3.5454545454545454, "grad_norm": 0.031825270503759384, "learning_rate": 6.6750000000000005e-06, "loss": 0.0008, "step": 1950 }, { "epoch": 3.5636363636363635, "grad_norm": 0.004583127796649933, "learning_rate": 6.591666666666667e-06, "loss": 0.0005, "step": 1960 }, { "epoch": 3.581818181818182, "grad_norm": 0.19434763491153717, "learning_rate": 6.508333333333334e-06, "loss": 0.0003, "step": 1970 }, { "epoch": 3.6, "grad_norm": 0.007167825475335121, "learning_rate": 6.425e-06, "loss": 0.0002, "step": 1980 }, { "epoch": 3.618181818181818, "grad_norm": 0.24422968924045563, "learning_rate": 6.341666666666667e-06, "loss": 0.0003, "step": 1990 }, { "epoch": 3.6363636363636362, "grad_norm": 0.00559116480872035, "learning_rate": 6.258333333333334e-06, "loss": 0.0001, "step": 2000 }, { "epoch": 3.6545454545454543, "grad_norm": 0.3058757185935974, "learning_rate": 6.175000000000001e-06, "loss": 0.0015, "step": 2010 }, { "epoch": 3.672727272727273, "grad_norm": 0.008120411075651646, "learning_rate": 6.091666666666667e-06, "loss": 0.0008, "step": 2020 }, { "epoch": 3.690909090909091, "grad_norm": 0.007178381085395813, "learning_rate": 6.008333333333334e-06, "loss": 0.0003, "step": 2030 }, { "epoch": 3.709090909090909, "grad_norm": 0.12139607220888138, "learning_rate": 5.925000000000001e-06, "loss": 0.0001, "step": 2040 }, { "epoch": 3.7272727272727275, "grad_norm": 0.16555677354335785, "learning_rate": 5.841666666666667e-06, "loss": 0.0004, "step": 2050 }, { "epoch": 3.7454545454545456, "grad_norm": 0.08208701014518738, "learning_rate": 5.758333333333334e-06, "loss": 0.0002, "step": 2060 }, { "epoch": 3.7636363636363637, "grad_norm": 0.0696110725402832, "learning_rate": 5.675000000000001e-06, "loss": 0.0008, "step": 2070 }, { "epoch": 3.7818181818181817, "grad_norm": 0.019171856343746185, "learning_rate": 5.591666666666668e-06, "loss": 0.0009, "step": 2080 }, { "epoch": 3.8, "grad_norm": 0.011577253229916096, "learning_rate": 5.508333333333334e-06, "loss": 0.0007, "step": 2090 }, { "epoch": 3.8181818181818183, "grad_norm": 0.1947954148054123, "learning_rate": 5.4250000000000006e-06, "loss": 0.0005, "step": 2100 }, { "epoch": 3.8181818181818183, "eval_f1": 0.9789500380420999, "eval_f2": 0.9838907014681892, "eval_loss": 0.012196212075650692, "eval_precision": 0.9708249496981891, "eval_recall": 0.9872122762148338, "eval_runtime": 10.6686, "eval_samples_per_second": 732.992, "eval_steps_per_second": 11.529, "step": 2100 } ], "logging_steps": 10, "max_steps": 2750, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 300, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.07084650174464e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }