anudit commited on 8 days ago

Commit

2bbbad8

verified ·

1 Parent(s): f6a50d0

Update to deberta-featattn-20260623-225422 (RAID AUROC 0.982)

Browse files

Files changed (23) hide show

checkpoint-5500/model.safetensors +3 -0
checkpoint-5500/optimizer.pt +3 -0
checkpoint-5500/rng_state.pth +3 -0
checkpoint-5500/scheduler.pt +3 -0
checkpoint-5500/trainer_state.json +1726 -0
checkpoint-5500/training_args.bin +3 -0
checkpoint-5626/model.safetensors +3 -0
checkpoint-5626/optimizer.pt +3 -0
checkpoint-5626/rng_state.pth +3 -0
checkpoint-5626/scheduler.pt +3 -0
checkpoint-5626/trainer_state.json +1774 -0
checkpoint-5626/training_args.bin +3 -0
meta.json +2 -2
onnx/detector_config.json +10 -0
onnx/meta.json +41 -0
onnx/model_fp16.onnx +1 -1
onnx/model_fp32.onnx +1 -1
onnx/model_int8.onnx +1 -1
onnx/model_q4.onnx +1 -1
onnx/raid_results.json +153 -0
onnx/raid_submission.json +0 -0
pytorch_model.bin +3 -0
raid_results.json +153 -0

checkpoint-5500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fe3b8087441254064eef4dd6b0f784ea859a81c1f82c6f95d267dff205e2014
+size 736795940

checkpoint-5500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2085e2dc024122d4670129270c990948737f20ceeee05e9a20c058e1300239af
+size 1473711115

checkpoint-5500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1c70b20302e039ff922ef92da23103cbd68279d464265f819dc67cd09814988
+size 14391

checkpoint-5500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc6aef81b6056fc9eeb9dde4ada035fd61be9ef8265329b42c068420e3be8d7b
+size 1529

checkpoint-5500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1726 @@

+{
+  "best_global_step": 5500,
+  "best_metric": 0.9973359040925472,
+  "best_model_checkpoint": "/Users/anudit/Documents/GitHub/slopdetector/checkpoints/deberta-featattn-20260623-225422/checkpoint-5500",
+  "epoch": 0.9776039815143974,
+  "eval_steps": 500,
+  "global_step": 5500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.004443654461429079,
+      "grad_norm": 3.013444662094116,
+      "learning_rate": 1.4201183431952664e-06,
+      "loss": 0.5004017639160157,
+      "step": 25
+    },
+    {
+      "epoch": 0.008887308922858158,
+      "grad_norm": 2.297563076019287,
+      "learning_rate": 2.8994082840236688e-06,
+      "loss": 0.3888048934936523,
+      "step": 50
+    },
+    {
+      "epoch": 0.013330963384287239,
+      "grad_norm": 1.134047508239746,
+      "learning_rate": 4.3786982248520715e-06,
+      "loss": 0.2801882553100586,
+      "step": 75
+    },
+    {
+      "epoch": 0.017774617845716316,
+      "grad_norm": 0.9834569096565247,
+      "learning_rate": 5.857988165680474e-06,
+      "loss": 0.24047454833984375,
+      "step": 100
+    },
+    {
+      "epoch": 0.022218272307145397,
+      "grad_norm": 1.117211103439331,
+      "learning_rate": 7.337278106508876e-06,
+      "loss": 0.2291146469116211,
+      "step": 125
+    },
+    {
+      "epoch": 0.026661926768574477,
+      "grad_norm": 0.8506172895431519,
+      "learning_rate": 8.816568047337279e-06,
+      "loss": 0.1967698097229004,
+      "step": 150
+    },
+    {
+      "epoch": 0.031105581230003555,
+      "grad_norm": 0.7021474242210388,
+      "learning_rate": 1.029585798816568e-05,
+      "loss": 0.17146373748779298,
+      "step": 175
+    },
+    {
+      "epoch": 0.03554923569143263,
+      "grad_norm": 1.2111107110977173,
+      "learning_rate": 1.1775147928994083e-05,
+      "loss": 0.13825268745422364,
+      "step": 200
+    },
+    {
+      "epoch": 0.03999289015286171,
+      "grad_norm": 1.9403120279312134,
+      "learning_rate": 1.3254437869822488e-05,
+      "loss": 0.12618659019470216,
+      "step": 225
+    },
+    {
+      "epoch": 0.04443654461429079,
+      "grad_norm": 1.8931593894958496,
+      "learning_rate": 1.4733727810650888e-05,
+      "loss": 0.10122986793518067,
+      "step": 250
+    },
+    {
+      "epoch": 0.048880199075719874,
+      "grad_norm": 1.8619073629379272,
+      "learning_rate": 1.621301775147929e-05,
+      "loss": 0.07919074535369873,
+      "step": 275
+    },
+    {
+      "epoch": 0.053323853537148955,
+      "grad_norm": 1.9105793237686157,
+      "learning_rate": 1.7692307692307694e-05,
+      "loss": 0.08386680603027344,
+      "step": 300
+    },
+    {
+      "epoch": 0.05776750799857803,
+      "grad_norm": 2.0332772731781006,
+      "learning_rate": 1.9171597633136098e-05,
+      "loss": 0.08702397346496582,
+      "step": 325
+    },
+    {
+      "epoch": 0.06221116246000711,
+      "grad_norm": 0.9020377993583679,
+      "learning_rate": 1.995839636913767e-05,
+      "loss": 0.06874918460845947,
+      "step": 350
+    },
+    {
+      "epoch": 0.06665481692143618,
+      "grad_norm": 1.6216212511062622,
+      "learning_rate": 1.9863842662632376e-05,
+      "loss": 0.06805606842041016,
+      "step": 375
+    },
+    {
+      "epoch": 0.07109847138286526,
+      "grad_norm": 1.7693337202072144,
+      "learning_rate": 1.9769288956127082e-05,
+      "loss": 0.06352178573608398,
+      "step": 400
+    },
+    {
+      "epoch": 0.07554212584429434,
+      "grad_norm": 1.6724389791488647,
+      "learning_rate": 1.9674735249621784e-05,
+      "loss": 0.0673055648803711,
+      "step": 425
+    },
+    {
+      "epoch": 0.07998578030572343,
+      "grad_norm": 0.5278561115264893,
+      "learning_rate": 1.9580181543116493e-05,
+      "loss": 0.06466075897216797,
+      "step": 450
+    },
+    {
+      "epoch": 0.0844294347671525,
+      "grad_norm": 1.7042737007141113,
+      "learning_rate": 1.9485627836611195e-05,
+      "loss": 0.0630407428741455,
+      "step": 475
+    },
+    {
+      "epoch": 0.08887308922858159,
+      "grad_norm": 0.3513544797897339,
+      "learning_rate": 1.93910741301059e-05,
+      "loss": 0.062327189445495604,
+      "step": 500
+    },
+    {
+      "epoch": 0.08887308922858159,
+      "eval_accuracy": 0.848,
+      "eval_auroc": 0.9866655199587185,
+      "eval_f1": 0.8685121107266436,
+      "eval_loss": 0.054977674037218094,
+      "eval_runtime": 39.8938,
+      "eval_samples_per_second": 50.133,
+      "eval_steps_per_second": 1.579,
+      "eval_tpr_at_fpr1": 0.7455268389662028,
+      "eval_tpr_at_fpr5": 0.937375745526839,
+      "step": 500
+    },
+    {
+      "epoch": 0.09331674369001067,
+      "grad_norm": 2.1725878715515137,
+      "learning_rate": 1.9296520423600606e-05,
+      "loss": 0.05081462860107422,
+      "step": 525
+    },
+    {
+      "epoch": 0.09776039815143975,
+      "grad_norm": 2.587542772293091,
+      "learning_rate": 1.9201966717095312e-05,
+      "loss": 0.06098108291625977,
+      "step": 550
+    },
+    {
+      "epoch": 0.10220405261286883,
+      "grad_norm": 1.1815265417099,
+      "learning_rate": 1.9107413010590018e-05,
+      "loss": 0.04866991996765137,
+      "step": 575
+    },
+    {
+      "epoch": 0.10664770707429791,
+      "grad_norm": 1.140872597694397,
+      "learning_rate": 1.901285930408472e-05,
+      "loss": 0.06058640956878662,
+      "step": 600
+    },
+    {
+      "epoch": 0.11109136153572698,
+      "grad_norm": 1.164772868156433,
+      "learning_rate": 1.891830559757943e-05,
+      "loss": 0.0485923957824707,
+      "step": 625
+    },
+    {
+      "epoch": 0.11553501599715606,
+      "grad_norm": 2.076003074645996,
+      "learning_rate": 1.882375189107413e-05,
+      "loss": 0.05198529243469238,
+      "step": 650
+    },
+    {
+      "epoch": 0.11997867045858514,
+      "grad_norm": 2.8677966594696045,
+      "learning_rate": 1.8729198184568836e-05,
+      "loss": 0.05215679168701172,
+      "step": 675
+    },
+    {
+      "epoch": 0.12442232492001422,
+      "grad_norm": 1.243391752243042,
+      "learning_rate": 1.8634644478063542e-05,
+      "loss": 0.046974472999572754,
+      "step": 700
+    },
+    {
+      "epoch": 0.12886597938144329,
+      "grad_norm": 1.970794916152954,
+      "learning_rate": 1.8540090771558244e-05,
+      "loss": 0.051630439758300783,
+      "step": 725
+    },
+    {
+      "epoch": 0.13330963384287237,
+      "grad_norm": 1.031387448310852,
+      "learning_rate": 1.8445537065052953e-05,
+      "loss": 0.04577981948852539,
+      "step": 750
+    },
+    {
+      "epoch": 0.13775328830430145,
+      "grad_norm": 1.4441957473754883,
+      "learning_rate": 1.8350983358547655e-05,
+      "loss": 0.05677220821380615,
+      "step": 775
+    },
+    {
+      "epoch": 0.14219694276573053,
+      "grad_norm": 1.2302734851837158,
+      "learning_rate": 1.825642965204236e-05,
+      "loss": 0.043911681175231934,
+      "step": 800
+    },
+    {
+      "epoch": 0.1466405972271596,
+      "grad_norm": 0.9389927983283997,
+      "learning_rate": 1.8161875945537066e-05,
+      "loss": 0.04272346019744873,
+      "step": 825
+    },
+    {
+      "epoch": 0.1510842516885887,
+      "grad_norm": 1.342290997505188,
+      "learning_rate": 1.8067322239031772e-05,
+      "loss": 0.054392943382263186,
+      "step": 850
+    },
+    {
+      "epoch": 0.15552790615001777,
+      "grad_norm": 2.6409666538238525,
+      "learning_rate": 1.7972768532526477e-05,
+      "loss": 0.04197061061859131,
+      "step": 875
+    },
+    {
+      "epoch": 0.15997156061144685,
+      "grad_norm": 1.1038918495178223,
+      "learning_rate": 1.787821482602118e-05,
+      "loss": 0.03587212562561035,
+      "step": 900
+    },
+    {
+      "epoch": 0.16441521507287593,
+      "grad_norm": 1.414070725440979,
+      "learning_rate": 1.778366111951589e-05,
+      "loss": 0.04636185646057129,
+      "step": 925
+    },
+    {
+      "epoch": 0.168858869534305,
+      "grad_norm": 2.164773941040039,
+      "learning_rate": 1.768910741301059e-05,
+      "loss": 0.044623188972473145,
+      "step": 950
+    },
+    {
+      "epoch": 0.1733025239957341,
+      "grad_norm": 2.06410813331604,
+      "learning_rate": 1.7594553706505296e-05,
+      "loss": 0.038699045181274414,
+      "step": 975
+    },
+    {
+      "epoch": 0.17774617845716317,
+      "grad_norm": 1.53926420211792,
+      "learning_rate": 1.7500000000000002e-05,
+      "loss": 0.038100283145904544,
+      "step": 1000
+    },
+    {
+      "epoch": 0.17774617845716317,
+      "eval_accuracy": 0.89,
+      "eval_auroc": 0.9912356844846415,
+      "eval_f1": 0.9012567324955117,
+      "eval_loss": 0.042472898960113525,
+      "eval_runtime": 38.57,
+      "eval_samples_per_second": 51.854,
+      "eval_steps_per_second": 1.633,
+      "eval_tpr_at_fpr1": 0.8230616302186878,
+      "eval_tpr_at_fpr5": 0.952286282306163,
+      "step": 1000
+    },
+    {
+      "epoch": 0.18218983291859225,
+      "grad_norm": 0.9646220803260803,
+      "learning_rate": 1.7405446293494704e-05,
+      "loss": 0.03173836708068847,
+      "step": 1025
+    },
+    {
+      "epoch": 0.18663348738002133,
+      "grad_norm": 1.0528196096420288,
+      "learning_rate": 1.7310892586989413e-05,
+      "loss": 0.03988344669342041,
+      "step": 1050
+    },
+    {
+      "epoch": 0.19107714184145042,
+      "grad_norm": 1.5726221799850464,
+      "learning_rate": 1.7216338880484115e-05,
+      "loss": 0.044674863815307615,
+      "step": 1075
+    },
+    {
+      "epoch": 0.1955207963028795,
+      "grad_norm": 1.551660418510437,
+      "learning_rate": 1.712178517397882e-05,
+      "loss": 0.040711288452148435,
+      "step": 1100
+    },
+    {
+      "epoch": 0.19996445076430858,
+      "grad_norm": 1.090385913848877,
+      "learning_rate": 1.7027231467473526e-05,
+      "loss": 0.037872114181518556,
+      "step": 1125
+    },
+    {
+      "epoch": 0.20440810522573766,
+      "grad_norm": 1.1778202056884766,
+      "learning_rate": 1.6932677760968232e-05,
+      "loss": 0.039130420684814454,
+      "step": 1150
+    },
+    {
+      "epoch": 0.20885175968716674,
+      "grad_norm": 1.402064323425293,
+      "learning_rate": 1.6838124054462937e-05,
+      "loss": 0.03875999212265015,
+      "step": 1175
+    },
+    {
+      "epoch": 0.21329541414859582,
+      "grad_norm": 0.7979677319526672,
+      "learning_rate": 1.674357034795764e-05,
+      "loss": 0.033257806301116945,
+      "step": 1200
+    },
+    {
+      "epoch": 0.21773906861002487,
+      "grad_norm": 2.5630085468292236,
+      "learning_rate": 1.664901664145235e-05,
+      "loss": 0.03850275993347168,
+      "step": 1225
+    },
+    {
+      "epoch": 0.22218272307145395,
+      "grad_norm": 1.1255887746810913,
+      "learning_rate": 1.655446293494705e-05,
+      "loss": 0.035763952732086185,
+      "step": 1250
+    },
+    {
+      "epoch": 0.22662637753288303,
+      "grad_norm": 2.648975133895874,
+      "learning_rate": 1.6459909228441756e-05,
+      "loss": 0.04280531883239746,
+      "step": 1275
+    },
+    {
+      "epoch": 0.23107003199431211,
+      "grad_norm": 1.7409067153930664,
+      "learning_rate": 1.6365355521936462e-05,
+      "loss": 0.04235891819000244,
+      "step": 1300
+    },
+    {
+      "epoch": 0.2355136864557412,
+      "grad_norm": 1.5755038261413574,
+      "learning_rate": 1.6270801815431164e-05,
+      "loss": 0.04084760665893555,
+      "step": 1325
+    },
+    {
+      "epoch": 0.23995734091717028,
+      "grad_norm": 1.2442480325698853,
+      "learning_rate": 1.6176248108925873e-05,
+      "loss": 0.03942857027053833,
+      "step": 1350
+    },
+    {
+      "epoch": 0.24440099537859936,
+      "grad_norm": 0.7775816321372986,
+      "learning_rate": 1.6081694402420575e-05,
+      "loss": 0.039197320938110354,
+      "step": 1375
+    },
+    {
+      "epoch": 0.24884464984002844,
+      "grad_norm": 0.44854021072387695,
+      "learning_rate": 1.598714069591528e-05,
+      "loss": 0.030667483806610107,
+      "step": 1400
+    },
+    {
+      "epoch": 0.25328830430145755,
+      "grad_norm": 0.9631138443946838,
+      "learning_rate": 1.5892586989409986e-05,
+      "loss": 0.03460927009582519,
+      "step": 1425
+    },
+    {
+      "epoch": 0.25773195876288657,
+      "grad_norm": 0.8312052488327026,
+      "learning_rate": 1.5798033282904692e-05,
+      "loss": 0.03320029735565186,
+      "step": 1450
+    },
+    {
+      "epoch": 0.26217561322431565,
+      "grad_norm": 1.1160472631454468,
+      "learning_rate": 1.5703479576399397e-05,
+      "loss": 0.03198946952819824,
+      "step": 1475
+    },
+    {
+      "epoch": 0.26661926768574473,
+      "grad_norm": 1.6029430627822876,
+      "learning_rate": 1.56089258698941e-05,
+      "loss": 0.034000282287597654,
+      "step": 1500
+    },
+    {
+      "epoch": 0.26661926768574473,
+      "eval_accuracy": 0.8785,
+      "eval_auroc": 0.9909046725682125,
+      "eval_f1": 0.8921438082556591,
+      "eval_loss": 0.04817873612046242,
+      "eval_runtime": 44.1179,
+      "eval_samples_per_second": 45.333,
+      "eval_steps_per_second": 1.428,
+      "eval_tpr_at_fpr1": 0.8021868787276342,
+      "eval_tpr_at_fpr5": 0.9572564612326043,
+      "step": 1500
+    },
+    {
+      "epoch": 0.2710629221471738,
+      "grad_norm": 2.1138088703155518,
+      "learning_rate": 1.5514372163388805e-05,
+      "loss": 0.03080030918121338,
+      "step": 1525
+    },
+    {
+      "epoch": 0.2755065766086029,
+      "grad_norm": 2.206002950668335,
+      "learning_rate": 1.541981845688351e-05,
+      "loss": 0.04029146194458008,
+      "step": 1550
+    },
+    {
+      "epoch": 0.279950231070032,
+      "grad_norm": 1.8083148002624512,
+      "learning_rate": 1.5325264750378216e-05,
+      "loss": 0.028056590557098388,
+      "step": 1575
+    },
+    {
+      "epoch": 0.28439388553146105,
+      "grad_norm": 0.9714005589485168,
+      "learning_rate": 1.5230711043872922e-05,
+      "loss": 0.033678176403045657,
+      "step": 1600
+    },
+    {
+      "epoch": 0.28883753999289014,
+      "grad_norm": 0.29741403460502625,
+      "learning_rate": 1.5136157337367626e-05,
+      "loss": 0.03180084943771362,
+      "step": 1625
+    },
+    {
+      "epoch": 0.2932811944543192,
+      "grad_norm": 0.516327440738678,
+      "learning_rate": 1.5041603630862331e-05,
+      "loss": 0.03431586980819702,
+      "step": 1650
+    },
+    {
+      "epoch": 0.2977248489157483,
+      "grad_norm": 0.7245854735374451,
+      "learning_rate": 1.4947049924357035e-05,
+      "loss": 0.03058172941207886,
+      "step": 1675
+    },
+    {
+      "epoch": 0.3021685033771774,
+      "grad_norm": 2.4247725009918213,
+      "learning_rate": 1.4852496217851742e-05,
+      "loss": 0.04614161014556885,
+      "step": 1700
+    },
+    {
+      "epoch": 0.30661215783860646,
+      "grad_norm": 1.5023690462112427,
+      "learning_rate": 1.4757942511346446e-05,
+      "loss": 0.03227074861526489,
+      "step": 1725
+    },
+    {
+      "epoch": 0.31105581230003554,
+      "grad_norm": 0.5162255764007568,
+      "learning_rate": 1.466338880484115e-05,
+      "loss": 0.037947914600372314,
+      "step": 1750
+    },
+    {
+      "epoch": 0.3154994667614646,
+      "grad_norm": 0.8927256464958191,
+      "learning_rate": 1.4568835098335856e-05,
+      "loss": 0.04013613700866699,
+      "step": 1775
+    },
+    {
+      "epoch": 0.3199431212228937,
+      "grad_norm": 1.9027554988861084,
+      "learning_rate": 1.447428139183056e-05,
+      "loss": 0.04084087371826172,
+      "step": 1800
+    },
+    {
+      "epoch": 0.3243867756843228,
+      "grad_norm": 1.0974030494689941,
+      "learning_rate": 1.4379727685325267e-05,
+      "loss": 0.02415942192077637,
+      "step": 1825
+    },
+    {
+      "epoch": 0.32883043014575186,
+      "grad_norm": 1.2848249673843384,
+      "learning_rate": 1.428517397881997e-05,
+      "loss": 0.03221212863922119,
+      "step": 1850
+    },
+    {
+      "epoch": 0.33327408460718094,
+      "grad_norm": 0.8059474229812622,
+      "learning_rate": 1.4190620272314676e-05,
+      "loss": 0.037272207736968994,
+      "step": 1875
+    },
+    {
+      "epoch": 0.33771773906861,
+      "grad_norm": 1.0132513046264648,
+      "learning_rate": 1.4096066565809382e-05,
+      "loss": 0.029253509044647217,
+      "step": 1900
+    },
+    {
+      "epoch": 0.3421613935300391,
+      "grad_norm": 0.7545719742774963,
+      "learning_rate": 1.4001512859304086e-05,
+      "loss": 0.03997385501861572,
+      "step": 1925
+    },
+    {
+      "epoch": 0.3466050479914682,
+      "grad_norm": 0.37751272320747375,
+      "learning_rate": 1.3906959152798791e-05,
+      "loss": 0.027481729984283446,
+      "step": 1950
+    },
+    {
+      "epoch": 0.35104870245289727,
+      "grad_norm": 1.522934079170227,
+      "learning_rate": 1.3812405446293495e-05,
+      "loss": 0.028633484840393065,
+      "step": 1975
+    },
+    {
+      "epoch": 0.35549235691432635,
+      "grad_norm": 1.2354328632354736,
+      "learning_rate": 1.3717851739788202e-05,
+      "loss": 0.022525691986083986,
+      "step": 2000
+    },
+    {
+      "epoch": 0.35549235691432635,
+      "eval_accuracy": 0.936,
+      "eval_auroc": 0.9948458144493202,
+      "eval_f1": 0.9399624765478424,
+      "eval_loss": 0.030829520896077156,
+      "eval_runtime": 40.0972,
+      "eval_samples_per_second": 49.879,
+      "eval_steps_per_second": 1.571,
+      "eval_tpr_at_fpr1": 0.9125248508946322,
+      "eval_tpr_at_fpr5": 0.9781312127236581,
+      "step": 2000
+    },
+    {
+      "epoch": 0.3599360113757554,
+      "grad_norm": 1.0933098793029785,
+      "learning_rate": 1.3623298033282906e-05,
+      "loss": 0.03529852867126465,
+      "step": 2025
+    },
+    {
+      "epoch": 0.3643796658371845,
+      "grad_norm": 2.0476551055908203,
+      "learning_rate": 1.352874432677761e-05,
+      "loss": 0.03386972665786743,
+      "step": 2050
+    },
+    {
+      "epoch": 0.3688233202986136,
+      "grad_norm": 0.9298884868621826,
+      "learning_rate": 1.3434190620272315e-05,
+      "loss": 0.03486936330795288,
+      "step": 2075
+    },
+    {
+      "epoch": 0.37326697476004267,
+      "grad_norm": 0.7512989044189453,
+      "learning_rate": 1.333963691376702e-05,
+      "loss": 0.0267183780670166,
+      "step": 2100
+    },
+    {
+      "epoch": 0.37771062922147175,
+      "grad_norm": 1.4821196794509888,
+      "learning_rate": 1.3245083207261727e-05,
+      "loss": 0.021263403892517088,
+      "step": 2125
+    },
+    {
+      "epoch": 0.38215428368290083,
+      "grad_norm": 0.8745072484016418,
+      "learning_rate": 1.315052950075643e-05,
+      "loss": 0.0272180438041687,
+      "step": 2150
+    },
+    {
+      "epoch": 0.3865979381443299,
+      "grad_norm": 1.6741865873336792,
+      "learning_rate": 1.3055975794251136e-05,
+      "loss": 0.026980955600738526,
+      "step": 2175
+    },
+    {
+      "epoch": 0.391041592605759,
+      "grad_norm": 0.8200652599334717,
+      "learning_rate": 1.2961422087745842e-05,
+      "loss": 0.027142252922058106,
+      "step": 2200
+    },
+    {
+      "epoch": 0.3954852470671881,
+      "grad_norm": 1.5616494417190552,
+      "learning_rate": 1.2866868381240545e-05,
+      "loss": 0.030536642074584962,
+      "step": 2225
+    },
+    {
+      "epoch": 0.39992890152861715,
+      "grad_norm": 0.7505294680595398,
+      "learning_rate": 1.2772314674735251e-05,
+      "loss": 0.03007654905319214,
+      "step": 2250
+    },
+    {
+      "epoch": 0.40437255599004623,
+      "grad_norm": 0.3857294023036957,
+      "learning_rate": 1.2677760968229955e-05,
+      "loss": 0.02641111135482788,
+      "step": 2275
+    },
+    {
+      "epoch": 0.4088162104514753,
+      "grad_norm": 0.9879816174507141,
+      "learning_rate": 1.2583207261724662e-05,
+      "loss": 0.027107694149017335,
+      "step": 2300
+    },
+    {
+      "epoch": 0.4132598649129044,
+      "grad_norm": 0.5398420095443726,
+      "learning_rate": 1.2488653555219366e-05,
+      "loss": 0.027692139148712158,
+      "step": 2325
+    },
+    {
+      "epoch": 0.4177035193743335,
+      "grad_norm": 0.8365870118141174,
+      "learning_rate": 1.239409984871407e-05,
+      "loss": 0.029139807224273683,
+      "step": 2350
+    },
+    {
+      "epoch": 0.42214717383576256,
+      "grad_norm": 1.1654356718063354,
+      "learning_rate": 1.2299546142208775e-05,
+      "loss": 0.025624027252197267,
+      "step": 2375
+    },
+    {
+      "epoch": 0.42659082829719164,
+      "grad_norm": 0.8927724361419678,
+      "learning_rate": 1.220499243570348e-05,
+      "loss": 0.032838408946990964,
+      "step": 2400
+    },
+    {
+      "epoch": 0.43103448275862066,
+      "grad_norm": 1.7535399198532104,
+      "learning_rate": 1.2110438729198187e-05,
+      "loss": 0.03213581085205078,
+      "step": 2425
+    },
+    {
+      "epoch": 0.43547813722004974,
+      "grad_norm": 1.506422996520996,
+      "learning_rate": 1.201588502269289e-05,
+      "loss": 0.031965067386627195,
+      "step": 2450
+    },
+    {
+      "epoch": 0.4399217916814788,
+      "grad_norm": 1.933950424194336,
+      "learning_rate": 1.1921331316187596e-05,
+      "loss": 0.02685023784637451,
+      "step": 2475
+    },
+    {
+      "epoch": 0.4443654461429079,
+      "grad_norm": 0.8511770963668823,
+      "learning_rate": 1.18267776096823e-05,
+      "loss": 0.03357476472854614,
+      "step": 2500
+    },
+    {
+      "epoch": 0.4443654461429079,
+      "eval_accuracy": 0.9095,
+      "eval_auroc": 0.9952128276617958,
+      "eval_f1": 0.9173893199452305,
+      "eval_loss": 0.030984506011009216,
+      "eval_runtime": 40.8627,
+      "eval_samples_per_second": 48.944,
+      "eval_steps_per_second": 1.542,
+      "eval_tpr_at_fpr1": 0.856858846918489,
+      "eval_tpr_at_fpr5": 0.9821073558648111,
+      "step": 2500
+    },
+    {
+      "epoch": 0.448809100604337,
+      "grad_norm": 1.3411929607391357,
+      "learning_rate": 1.1732223903177005e-05,
+      "loss": 0.026271984577178956,
+      "step": 2525
+    },
+    {
+      "epoch": 0.45325275506576607,
+      "grad_norm": 1.3650128841400146,
+      "learning_rate": 1.1637670196671711e-05,
+      "loss": 0.030547237396240233,
+      "step": 2550
+    },
+    {
+      "epoch": 0.45769640952719515,
+      "grad_norm": 0.7035048007965088,
+      "learning_rate": 1.1543116490166415e-05,
+      "loss": 0.026542000770568848,
+      "step": 2575
+    },
+    {
+      "epoch": 0.46214006398862423,
+      "grad_norm": 1.3388855457305908,
+      "learning_rate": 1.1448562783661122e-05,
+      "loss": 0.024521036148071287,
+      "step": 2600
+    },
+    {
+      "epoch": 0.4665837184500533,
+      "grad_norm": 1.0085132122039795,
+      "learning_rate": 1.1354009077155826e-05,
+      "loss": 0.024952406883239745,
+      "step": 2625
+    },
+    {
+      "epoch": 0.4710273729114824,
+      "grad_norm": 0.30464261770248413,
+      "learning_rate": 1.125945537065053e-05,
+      "loss": 0.02288907766342163,
+      "step": 2650
+    },
+    {
+      "epoch": 0.47547102737291147,
+      "grad_norm": 0.6784248948097229,
+      "learning_rate": 1.1164901664145235e-05,
+      "loss": 0.024585678577423095,
+      "step": 2675
+    },
+    {
+      "epoch": 0.47991468183434055,
+      "grad_norm": 1.2737281322479248,
+      "learning_rate": 1.107034795763994e-05,
+      "loss": 0.028601126670837404,
+      "step": 2700
+    },
+    {
+      "epoch": 0.48435833629576963,
+      "grad_norm": 1.2060391902923584,
+      "learning_rate": 1.0975794251134646e-05,
+      "loss": 0.03009690284729004,
+      "step": 2725
+    },
+    {
+      "epoch": 0.4888019907571987,
+      "grad_norm": 0.9331129789352417,
+      "learning_rate": 1.088124054462935e-05,
+      "loss": 0.024897255897521973,
+      "step": 2750
+    },
+    {
+      "epoch": 0.4932456452186278,
+      "grad_norm": 0.7035834789276123,
+      "learning_rate": 1.0786686838124056e-05,
+      "loss": 0.029437661170959473,
+      "step": 2775
+    },
+    {
+      "epoch": 0.4976892996800569,
+      "grad_norm": 1.3447843790054321,
+      "learning_rate": 1.069213313161876e-05,
+      "loss": 0.024274458885192873,
+      "step": 2800
+    },
+    {
+      "epoch": 0.502132954141486,
+      "grad_norm": 0.7223392724990845,
+      "learning_rate": 1.0597579425113464e-05,
+      "loss": 0.029445352554321288,
+      "step": 2825
+    },
+    {
+      "epoch": 0.5065766086029151,
+      "grad_norm": 1.4334781169891357,
+      "learning_rate": 1.0503025718608171e-05,
+      "loss": 0.0277593731880188,
+      "step": 2850
+    },
+    {
+      "epoch": 0.5110202630643441,
+      "grad_norm": 1.4802097082138062,
+      "learning_rate": 1.0408472012102875e-05,
+      "loss": 0.029638910293579103,
+      "step": 2875
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 1.0358779430389404,
+      "learning_rate": 1.031391830559758e-05,
+      "loss": 0.021964311599731445,
+      "step": 2900
+    },
+    {
+      "epoch": 0.5199075719872023,
+      "grad_norm": 0.5717483758926392,
+      "learning_rate": 1.0219364599092286e-05,
+      "loss": 0.02695397138595581,
+      "step": 2925
+    },
+    {
+      "epoch": 0.5243512264486313,
+      "grad_norm": 0.5034074783325195,
+      "learning_rate": 1.012481089258699e-05,
+      "loss": 0.02091419219970703,
+      "step": 2950
+    },
+    {
+      "epoch": 0.5287948809100604,
+      "grad_norm": 1.093700647354126,
+      "learning_rate": 1.0030257186081695e-05,
+      "loss": 0.018702698945999144,
+      "step": 2975
+    },
+    {
+      "epoch": 0.5332385353714895,
+      "grad_norm": 0.5766699910163879,
+      "learning_rate": 9.935703479576401e-06,
+      "loss": 0.02352435827255249,
+      "step": 3000
+    },
+    {
+      "epoch": 0.5332385353714895,
+      "eval_accuracy": 0.903,
+      "eval_auroc": 0.992703737334544,
+      "eval_f1": 0.9120580235720762,
+      "eval_loss": 0.036512941122055054,
+      "eval_runtime": 40.4309,
+      "eval_samples_per_second": 49.467,
+      "eval_steps_per_second": 1.558,
+      "eval_tpr_at_fpr1": 0.852882703777336,
+      "eval_tpr_at_fpr5": 0.9582504970178927,
+      "step": 3000
+    },
+    {
+      "epoch": 0.5376821898329186,
+      "grad_norm": 1.6101889610290527,
+      "learning_rate": 9.841149773071105e-06,
+      "loss": 0.02831456422805786,
+      "step": 3025
+    },
+    {
+      "epoch": 0.5421258442943476,
+      "grad_norm": 1.7061760425567627,
+      "learning_rate": 9.74659606656581e-06,
+      "loss": 0.023692820072174072,
+      "step": 3050
+    },
+    {
+      "epoch": 0.5465694987557768,
+      "grad_norm": 1.293489933013916,
+      "learning_rate": 9.652042360060516e-06,
+      "loss": 0.021458499431610108,
+      "step": 3075
+    },
+    {
+      "epoch": 0.5510131532172058,
+      "grad_norm": 1.280171513557434,
+      "learning_rate": 9.55748865355522e-06,
+      "loss": 0.023303213119506835,
+      "step": 3100
+    },
+    {
+      "epoch": 0.5554568076786349,
+      "grad_norm": 1.2874751091003418,
+      "learning_rate": 9.462934947049925e-06,
+      "loss": 0.027433459758758546,
+      "step": 3125
+    },
+    {
+      "epoch": 0.559900462140064,
+      "grad_norm": 1.2265180349349976,
+      "learning_rate": 9.36838124054463e-06,
+      "loss": 0.02306551456451416,
+      "step": 3150
+    },
+    {
+      "epoch": 0.5643441166014931,
+      "grad_norm": 2.207395076751709,
+      "learning_rate": 9.273827534039335e-06,
+      "loss": 0.030330984592437743,
+      "step": 3175
+    },
+    {
+      "epoch": 0.5687877710629221,
+      "grad_norm": 0.700985312461853,
+      "learning_rate": 9.17927382753404e-06,
+      "loss": 0.02533963918685913,
+      "step": 3200
+    },
+    {
+      "epoch": 0.5732314255243512,
+      "grad_norm": 0.8443852663040161,
+      "learning_rate": 9.084720121028746e-06,
+      "loss": 0.029710006713867188,
+      "step": 3225
+    },
+    {
+      "epoch": 0.5776750799857803,
+      "grad_norm": 0.5237564444541931,
+      "learning_rate": 8.99016641452345e-06,
+      "loss": 0.02827130079269409,
+      "step": 3250
+    },
+    {
+      "epoch": 0.5821187344472094,
+      "grad_norm": 1.318710446357727,
+      "learning_rate": 8.895612708018155e-06,
+      "loss": 0.017649848461151123,
+      "step": 3275
+    },
+    {
+      "epoch": 0.5865623889086384,
+      "grad_norm": 2.1418726444244385,
+      "learning_rate": 8.80105900151286e-06,
+      "loss": 0.028039700984954834,
+      "step": 3300
+    },
+    {
+      "epoch": 0.5910060433700676,
+      "grad_norm": 0.6394239068031311,
+      "learning_rate": 8.706505295007565e-06,
+      "loss": 0.029724645614624023,
+      "step": 3325
+    },
+    {
+      "epoch": 0.5954496978314966,
+      "grad_norm": 0.44896772503852844,
+      "learning_rate": 8.61195158850227e-06,
+      "loss": 0.026093797683715822,
+      "step": 3350
+    },
+    {
+      "epoch": 0.5998933522929257,
+      "grad_norm": 2.3762757778167725,
+      "learning_rate": 8.517397881996974e-06,
+      "loss": 0.027712843418121337,
+      "step": 3375
+    },
+    {
+      "epoch": 0.6043370067543548,
+      "grad_norm": 1.4584051370620728,
+      "learning_rate": 8.42284417549168e-06,
+      "loss": 0.031196737289428712,
+      "step": 3400
+    },
+    {
+      "epoch": 0.6087806612157839,
+      "grad_norm": 2.2569475173950195,
+      "learning_rate": 8.328290468986385e-06,
+      "loss": 0.026621932983398437,
+      "step": 3425
+    },
+    {
+      "epoch": 0.6132243156772129,
+      "grad_norm": 1.9737194776535034,
+      "learning_rate": 8.23373676248109e-06,
+      "loss": 0.020163617134094237,
+      "step": 3450
+    },
+    {
+      "epoch": 0.617667970138642,
+      "grad_norm": 0.9083975553512573,
+      "learning_rate": 8.139183055975795e-06,
+      "loss": 0.0209149169921875,
+      "step": 3475
+    },
+    {
+      "epoch": 0.6221116246000711,
+      "grad_norm": 1.0563571453094482,
+      "learning_rate": 8.0446293494705e-06,
+      "loss": 0.025532805919647218,
+      "step": 3500
+    },
+    {
+      "epoch": 0.6221116246000711,
+      "eval_accuracy": 0.8925,
+      "eval_auroc": 0.9965268749674989,
+      "eval_f1": 0.9033707865168539,
+      "eval_loss": 0.029821457341313362,
+      "eval_runtime": 40.7571,
+      "eval_samples_per_second": 49.071,
+      "eval_steps_per_second": 1.546,
+      "eval_tpr_at_fpr1": 0.9254473161033797,
+      "eval_tpr_at_fpr5": 0.9850894632206759,
+      "step": 3500
+    },
+    {
+      "epoch": 0.6265552790615002,
+      "grad_norm": 1.6384830474853516,
+      "learning_rate": 7.950075642965204e-06,
+      "loss": 0.018657710552215576,
+      "step": 3525
+    },
+    {
+      "epoch": 0.6309989335229292,
+      "grad_norm": 1.4129881858825684,
+      "learning_rate": 7.85552193645991e-06,
+      "loss": 0.027512576580047608,
+      "step": 3550
+    },
+    {
+      "epoch": 0.6354425879843584,
+      "grad_norm": 1.2471665143966675,
+      "learning_rate": 7.760968229954615e-06,
+      "loss": 0.029382569789886473,
+      "step": 3575
+    },
+    {
+      "epoch": 0.6398862424457874,
+      "grad_norm": 1.1254513263702393,
+      "learning_rate": 7.66641452344932e-06,
+      "loss": 0.023775274753570556,
+      "step": 3600
+    },
+    {
+      "epoch": 0.6443298969072165,
+      "grad_norm": 0.9185925126075745,
+      "learning_rate": 7.571860816944025e-06,
+      "loss": 0.025625219345092775,
+      "step": 3625
+    },
+    {
+      "epoch": 0.6487735513686456,
+      "grad_norm": 0.9741719961166382,
+      "learning_rate": 7.477307110438729e-06,
+      "loss": 0.014400173425674439,
+      "step": 3650
+    },
+    {
+      "epoch": 0.6532172058300747,
+      "grad_norm": 1.5722410678863525,
+      "learning_rate": 7.382753403933435e-06,
+      "loss": 0.016961036920547484,
+      "step": 3675
+    },
+    {
+      "epoch": 0.6576608602915037,
+      "grad_norm": 1.0956284999847412,
+      "learning_rate": 7.28819969742814e-06,
+      "loss": 0.029399728775024413,
+      "step": 3700
+    },
+    {
+      "epoch": 0.6621045147529329,
+      "grad_norm": 1.8072013854980469,
+      "learning_rate": 7.193645990922845e-06,
+      "loss": 0.02603915214538574,
+      "step": 3725
+    },
+    {
+      "epoch": 0.6665481692143619,
+      "grad_norm": 1.4998871088027954,
+      "learning_rate": 7.09909228441755e-06,
+      "loss": 0.018154734373092653,
+      "step": 3750
+    },
+    {
+      "epoch": 0.670991823675791,
+      "grad_norm": 1.015345573425293,
+      "learning_rate": 7.004538577912255e-06,
+      "loss": 0.020612461566925047,
+      "step": 3775
+    },
+    {
+      "epoch": 0.67543547813722,
+      "grad_norm": 0.518636167049408,
+      "learning_rate": 6.909984871406959e-06,
+      "loss": 0.020666675567626955,
+      "step": 3800
+    },
+    {
+      "epoch": 0.6798791325986492,
+      "grad_norm": 1.4760479927062988,
+      "learning_rate": 6.815431164901665e-06,
+      "loss": 0.022126734256744385,
+      "step": 3825
+    },
+    {
+      "epoch": 0.6843227870600782,
+      "grad_norm": 0.5096405744552612,
+      "learning_rate": 6.7208774583963696e-06,
+      "loss": 0.023763720989227296,
+      "step": 3850
+    },
+    {
+      "epoch": 0.6887664415215072,
+      "grad_norm": 0.7516443133354187,
+      "learning_rate": 6.626323751891075e-06,
+      "loss": 0.023717043399810792,
+      "step": 3875
+    },
+    {
+      "epoch": 0.6932100959829364,
+      "grad_norm": 0.8385019898414612,
+      "learning_rate": 6.53177004538578e-06,
+      "loss": 0.021878042221069337,
+      "step": 3900
+    },
+    {
+      "epoch": 0.6976537504443654,
+      "grad_norm": 1.1693350076675415,
+      "learning_rate": 6.4372163388804845e-06,
+      "loss": 0.014371514320373535,
+      "step": 3925
+    },
+    {
+      "epoch": 0.7020974049057945,
+      "grad_norm": 1.4496546983718872,
+      "learning_rate": 6.342662632375189e-06,
+      "loss": 0.021327991485595704,
+      "step": 3950
+    },
+    {
+      "epoch": 0.7065410593672236,
+      "grad_norm": 1.0142734050750732,
+      "learning_rate": 6.248108925869895e-06,
+      "loss": 0.023486480712890626,
+      "step": 3975
+    },
+    {
+      "epoch": 0.7109847138286527,
+      "grad_norm": 0.4203350245952606,
+      "learning_rate": 6.1535552193645995e-06,
+      "loss": 0.023264715671539305,
+      "step": 4000
+    },
+    {
+      "epoch": 0.7109847138286527,
+      "eval_accuracy": 0.9335,
+      "eval_auroc": 0.9970688944802013,
+      "eval_f1": 0.9379374708352777,
+      "eval_loss": 0.02452407218515873,
+      "eval_runtime": 40.1446,
+      "eval_samples_per_second": 49.82,
+      "eval_steps_per_second": 1.569,
+      "eval_tpr_at_fpr1": 0.9264413518886679,
+      "eval_tpr_at_fpr5": 0.9850894632206759,
+      "step": 4000
+    },
+    {
+      "epoch": 0.7154283682900817,
+      "grad_norm": 0.6930143237113953,
+      "learning_rate": 6.059001512859305e-06,
+      "loss": 0.01856299042701721,
+      "step": 4025
+    },
+    {
+      "epoch": 0.7198720227515109,
+      "grad_norm": 1.0674962997436523,
+      "learning_rate": 5.96444780635401e-06,
+      "loss": 0.023626606464385986,
+      "step": 4050
+    },
+    {
+      "epoch": 0.7243156772129399,
+      "grad_norm": 0.6356366276741028,
+      "learning_rate": 5.8698940998487145e-06,
+      "loss": 0.023326983451843263,
+      "step": 4075
+    },
+    {
+      "epoch": 0.728759331674369,
+      "grad_norm": 0.8227376937866211,
+      "learning_rate": 5.775340393343419e-06,
+      "loss": 0.02331566095352173,
+      "step": 4100
+    },
+    {
+      "epoch": 0.733202986135798,
+      "grad_norm": 2.189657211303711,
+      "learning_rate": 5.680786686838125e-06,
+      "loss": 0.01987994074821472,
+      "step": 4125
+    },
+    {
+      "epoch": 0.7376466405972272,
+      "grad_norm": 0.46455055475234985,
+      "learning_rate": 5.5862329803328295e-06,
+      "loss": 0.01780161142349243,
+      "step": 4150
+    },
+    {
+      "epoch": 0.7420902950586562,
+      "grad_norm": 0.7525627017021179,
+      "learning_rate": 5.491679273827535e-06,
+      "loss": 0.02872683048248291,
+      "step": 4175
+    },
+    {
+      "epoch": 0.7465339495200853,
+      "grad_norm": 0.9939025640487671,
+      "learning_rate": 5.39712556732224e-06,
+      "loss": 0.021651785373687744,
+      "step": 4200
+    },
+    {
+      "epoch": 0.7509776039815144,
+      "grad_norm": 0.5748035907745361,
+      "learning_rate": 5.3025718608169445e-06,
+      "loss": 0.01857919096946716,
+      "step": 4225
+    },
+    {
+      "epoch": 0.7554212584429435,
+      "grad_norm": 1.1377756595611572,
+      "learning_rate": 5.208018154311649e-06,
+      "loss": 0.021290059089660644,
+      "step": 4250
+    },
+    {
+      "epoch": 0.7598649129043725,
+      "grad_norm": 1.592410683631897,
+      "learning_rate": 5.113464447806355e-06,
+      "loss": 0.01949896812438965,
+      "step": 4275
+    },
+    {
+      "epoch": 0.7643085673658017,
+      "grad_norm": 1.3217352628707886,
+      "learning_rate": 5.0189107413010595e-06,
+      "loss": 0.024791300296783447,
+      "step": 4300
+    },
+    {
+      "epoch": 0.7687522218272307,
+      "grad_norm": 0.3922988474369049,
+      "learning_rate": 4.924357034795764e-06,
+      "loss": 0.021808433532714843,
+      "step": 4325
+    },
+    {
+      "epoch": 0.7731958762886598,
+      "grad_norm": 0.45381656289100647,
+      "learning_rate": 4.82980332829047e-06,
+      "loss": 0.020455398559570313,
+      "step": 4350
+    },
+    {
+      "epoch": 0.7776395307500888,
+      "grad_norm": 0.8540909886360168,
+      "learning_rate": 4.7352496217851745e-06,
+      "loss": 0.023225700855255126,
+      "step": 4375
+    },
+    {
+      "epoch": 0.782083185211518,
+      "grad_norm": 2.9069783687591553,
+      "learning_rate": 4.640695915279879e-06,
+      "loss": 0.023292510509490966,
+      "step": 4400
+    },
+    {
+      "epoch": 0.786526839672947,
+      "grad_norm": 0.9787670969963074,
+      "learning_rate": 4.546142208774585e-06,
+      "loss": 0.024890389442443848,
+      "step": 4425
+    },
+    {
+      "epoch": 0.7909704941343761,
+      "grad_norm": 1.0303661823272705,
+      "learning_rate": 4.4515885022692894e-06,
+      "loss": 0.023072149753570557,
+      "step": 4450
+    },
+    {
+      "epoch": 0.7954141485958052,
+      "grad_norm": 2.1931862831115723,
+      "learning_rate": 4.357034795763994e-06,
+      "loss": 0.032431015968322756,
+      "step": 4475
+    },
+    {
+      "epoch": 0.7998578030572343,
+      "grad_norm": 0.7739485502243042,
+      "learning_rate": 4.2624810892587e-06,
+      "loss": 0.020095715522766112,
+      "step": 4500
+    },
+    {
+      "epoch": 0.7998578030572343,
+      "eval_accuracy": 0.9275,
+      "eval_auroc": 0.9970798948762156,
+      "eval_f1": 0.9327146171693736,
+      "eval_loss": 0.024173183366656303,
+      "eval_runtime": 39.1522,
+      "eval_samples_per_second": 51.083,
+      "eval_steps_per_second": 1.609,
+      "eval_tpr_at_fpr1": 0.9224652087475149,
+      "eval_tpr_at_fpr5": 0.9900596421471173,
+      "step": 4500
+    },
+    {
+      "epoch": 0.8043014575186633,
+      "grad_norm": 1.5598735809326172,
+      "learning_rate": 4.167927382753404e-06,
+      "loss": 0.024987099170684816,
+      "step": 4525
+    },
+    {
+      "epoch": 0.8087451119800925,
+      "grad_norm": 1.1426900625228882,
+      "learning_rate": 4.073373676248109e-06,
+      "loss": 0.01934351325035095,
+      "step": 4550
+    },
+    {
+      "epoch": 0.8131887664415215,
+      "grad_norm": 0.3795163333415985,
+      "learning_rate": 3.978819969742814e-06,
+      "loss": 0.020940425395965575,
+      "step": 4575
+    },
+    {
+      "epoch": 0.8176324209029506,
+      "grad_norm": 1.1596218347549438,
+      "learning_rate": 3.884266263237519e-06,
+      "loss": 0.027658913135528564,
+      "step": 4600
+    },
+    {
+      "epoch": 0.8220760753643797,
+      "grad_norm": 1.05118989944458,
+      "learning_rate": 3.789712556732224e-06,
+      "loss": 0.016726157665252685,
+      "step": 4625
+    },
+    {
+      "epoch": 0.8265197298258088,
+      "grad_norm": 0.994926393032074,
+      "learning_rate": 3.6951588502269293e-06,
+      "loss": 0.013396300077438354,
+      "step": 4650
+    },
+    {
+      "epoch": 0.8309633842872378,
+      "grad_norm": 2.416964054107666,
+      "learning_rate": 3.6006051437216344e-06,
+      "loss": 0.02318723201751709,
+      "step": 4675
+    },
+    {
+      "epoch": 0.835407038748667,
+      "grad_norm": 2.359633445739746,
+      "learning_rate": 3.506051437216339e-06,
+      "loss": 0.02345597982406616,
+      "step": 4700
+    },
+    {
+      "epoch": 0.839850693210096,
+      "grad_norm": 1.0586191415786743,
+      "learning_rate": 3.4114977307110442e-06,
+      "loss": 0.021095492839813233,
+      "step": 4725
+    },
+    {
+      "epoch": 0.8442943476715251,
+      "grad_norm": 1.2005938291549683,
+      "learning_rate": 3.3169440242057494e-06,
+      "loss": 0.023975539207458495,
+      "step": 4750
+    },
+    {
+      "epoch": 0.8487380021329541,
+      "grad_norm": 0.22911959886550903,
+      "learning_rate": 3.222390317700454e-06,
+      "loss": 0.019334245920181275,
+      "step": 4775
+    },
+    {
+      "epoch": 0.8531816565943833,
+      "grad_norm": 1.3965319395065308,
+      "learning_rate": 3.1278366111951592e-06,
+      "loss": 0.027639262676239014,
+      "step": 4800
+    },
+    {
+      "epoch": 0.8576253110558123,
+      "grad_norm": 0.15118920803070068,
+      "learning_rate": 3.0332829046898644e-06,
+      "loss": 0.020000927448272705,
+      "step": 4825
+    },
+    {
+      "epoch": 0.8620689655172413,
+      "grad_norm": 1.7333295345306396,
+      "learning_rate": 2.938729198184569e-06,
+      "loss": 0.02153873920440674,
+      "step": 4850
+    },
+    {
+      "epoch": 0.8665126199786705,
+      "grad_norm": 0.2823106348514557,
+      "learning_rate": 2.844175491679274e-06,
+      "loss": 0.014718363285064697,
+      "step": 4875
+    },
+    {
+      "epoch": 0.8709562744400995,
+      "grad_norm": 0.735140323638916,
+      "learning_rate": 2.7496217851739793e-06,
+      "loss": 0.01869586229324341,
+      "step": 4900
+    },
+    {
+      "epoch": 0.8753999289015286,
+      "grad_norm": 0.4756013751029968,
+      "learning_rate": 2.655068078668684e-06,
+      "loss": 0.01766459345817566,
+      "step": 4925
+    },
+    {
+      "epoch": 0.8798435833629576,
+      "grad_norm": 1.9793126583099365,
+      "learning_rate": 2.560514372163389e-06,
+      "loss": 0.01844774007797241,
+      "step": 4950
+    },
+    {
+      "epoch": 0.8842872378243868,
+      "grad_norm": 0.3258880376815796,
+      "learning_rate": 2.465960665658094e-06,
+      "loss": 0.01422677755355835,
+      "step": 4975
+    },
+    {
+      "epoch": 0.8887308922858158,
+      "grad_norm": 0.9487712383270264,
+      "learning_rate": 2.371406959152799e-06,
+      "loss": 0.016807562112808226,
+      "step": 5000
+    },
+    {
+      "epoch": 0.8887308922858158,
+      "eval_accuracy": 0.9295,
+      "eval_auroc": 0.9969978919241093,
+      "eval_f1": 0.9344490934449095,
+      "eval_loss": 0.024602515622973442,
+      "eval_runtime": 39.8771,
+      "eval_samples_per_second": 50.154,
+      "eval_steps_per_second": 1.58,
+      "eval_tpr_at_fpr1": 0.889662027833002,
+      "eval_tpr_at_fpr5": 0.9910536779324056,
+      "step": 5000
+    },
+    {
+      "epoch": 0.893174546747245,
+      "grad_norm": 1.8331549167633057,
+      "learning_rate": 2.276853252647504e-06,
+      "loss": 0.019318313598632814,
+      "step": 5025
+    },
+    {
+      "epoch": 0.897618201208674,
+      "grad_norm": 0.5335781574249268,
+      "learning_rate": 2.182299546142209e-06,
+      "loss": 0.016727542877197264,
+      "step": 5050
+    },
+    {
+      "epoch": 0.9020618556701031,
+      "grad_norm": 2.0710813999176025,
+      "learning_rate": 2.087745839636914e-06,
+      "loss": 0.024016971588134765,
+      "step": 5075
+    },
+    {
+      "epoch": 0.9065055101315321,
+      "grad_norm": 0.5005258321762085,
+      "learning_rate": 1.993192133131619e-06,
+      "loss": 0.023308300971984865,
+      "step": 5100
+    },
+    {
+      "epoch": 0.9109491645929613,
+      "grad_norm": 0.8444198369979858,
+      "learning_rate": 1.8986384266263239e-06,
+      "loss": 0.013868091106414794,
+      "step": 5125
+    },
+    {
+      "epoch": 0.9153928190543903,
+      "grad_norm": 1.0288333892822266,
+      "learning_rate": 1.8040847201210288e-06,
+      "loss": 0.02429831266403198,
+      "step": 5150
+    },
+    {
+      "epoch": 0.9198364735158194,
+      "grad_norm": 0.5995722413063049,
+      "learning_rate": 1.709531013615734e-06,
+      "loss": 0.020702524185180662,
+      "step": 5175
+    },
+    {
+      "epoch": 0.9242801279772485,
+      "grad_norm": 1.5560880899429321,
+      "learning_rate": 1.6149773071104389e-06,
+      "loss": 0.014829163551330566,
+      "step": 5200
+    },
+    {
+      "epoch": 0.9287237824386776,
+      "grad_norm": 1.271360993385315,
+      "learning_rate": 1.5204236006051438e-06,
+      "loss": 0.020302300453186036,
+      "step": 5225
+    },
+    {
+      "epoch": 0.9331674369001066,
+      "grad_norm": 2.036619186401367,
+      "learning_rate": 1.425869894099849e-06,
+      "loss": 0.023549365997314452,
+      "step": 5250
+    },
+    {
+      "epoch": 0.9376110913615358,
+      "grad_norm": 1.7285027503967285,
+      "learning_rate": 1.3313161875945538e-06,
+      "loss": 0.02568220853805542,
+      "step": 5275
+    },
+    {
+      "epoch": 0.9420547458229648,
+      "grad_norm": 1.4646673202514648,
+      "learning_rate": 1.2367624810892588e-06,
+      "loss": 0.030799252986907957,
+      "step": 5300
+    },
+    {
+      "epoch": 0.9464984002843939,
+      "grad_norm": 0.6867812871932983,
+      "learning_rate": 1.142208774583964e-06,
+      "loss": 0.018717833757400514,
+      "step": 5325
+    },
+    {
+      "epoch": 0.9509420547458229,
+      "grad_norm": 0.2627001702785492,
+      "learning_rate": 1.0476550680786688e-06,
+      "loss": 0.01653684616088867,
+      "step": 5350
+    },
+    {
+      "epoch": 0.9553857092072521,
+      "grad_norm": 0.30518868565559387,
+      "learning_rate": 9.531013615733737e-07,
+      "loss": 0.022223813533782957,
+      "step": 5375
+    },
+    {
+      "epoch": 0.9598293636686811,
+      "grad_norm": 0.5545350909233093,
+      "learning_rate": 8.585476550680788e-07,
+      "loss": 0.018215081691741943,
+      "step": 5400
+    },
+    {
+      "epoch": 0.9642730181301102,
+      "grad_norm": 0.304283082485199,
+      "learning_rate": 7.639939485627837e-07,
+      "loss": 0.019889332056045532,
+      "step": 5425
+    },
+    {
+      "epoch": 0.9687166725915393,
+      "grad_norm": 1.052090048789978,
+      "learning_rate": 6.694402420574887e-07,
+      "loss": 0.017396693229675294,
+      "step": 5450
+    },
+    {
+      "epoch": 0.9731603270529684,
+      "grad_norm": 0.1891939640045166,
+      "learning_rate": 5.748865355521937e-07,
+      "loss": 0.019253385066986085,
+      "step": 5475
+    },
+    {
+      "epoch": 0.9776039815143974,
+      "grad_norm": 0.5522451996803284,
+      "learning_rate": 4.803328290468987e-07,
+      "loss": 0.017252475023269653,
+      "step": 5500
+    },
+    {
+      "epoch": 0.9776039815143974,
+      "eval_accuracy": 0.93,
+      "eval_auroc": 0.9973359040925472,
+      "eval_f1": 0.9349442379182157,
+      "eval_loss": 0.024247920140624046,
+      "eval_runtime": 38.3653,
+      "eval_samples_per_second": 52.13,
+      "eval_steps_per_second": 1.642,
+      "eval_tpr_at_fpr1": 0.9055666003976143,
+      "eval_tpr_at_fpr5": 0.9880715705765407,
+      "step": 5500
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5626,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-5500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b726f050f5029e1ef25800ffb43c1e5bcf5df8fde670427401e6bad8b3522c9c
+size 5329

checkpoint-5626/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8865a48d69b743be81bf98afb9d729976c955f40269b3b87be5ebeeead6b1d9b
+size 736795940

checkpoint-5626/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2aafb3d4e70e97e271bcfaafa4b2a63a7a706d7fa375e595ef3c1028febc5937
+size 1473711115

checkpoint-5626/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1c70b20302e039ff922ef92da23103cbd68279d464265f819dc67cd09814988
+size 14391

checkpoint-5626/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d3c5c2fe67c6e02d1952ca318cead603068a46c59b4564bb99394113f7a5048
+size 1529

checkpoint-5626/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1774 @@

+{
+  "best_global_step": 5626,
+  "best_metric": 0.9973979063246277,
+  "best_model_checkpoint": "/Users/anudit/Documents/GitHub/slopdetector/checkpoints/deberta-featattn-20260623-225422/checkpoint-5626",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 5626,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.004443654461429079,
+      "grad_norm": 3.013444662094116,
+      "learning_rate": 1.4201183431952664e-06,
+      "loss": 0.5004017639160157,
+      "step": 25
+    },
+    {
+      "epoch": 0.008887308922858158,
+      "grad_norm": 2.297563076019287,
+      "learning_rate": 2.8994082840236688e-06,
+      "loss": 0.3888048934936523,
+      "step": 50
+    },
+    {
+      "epoch": 0.013330963384287239,
+      "grad_norm": 1.134047508239746,
+      "learning_rate": 4.3786982248520715e-06,
+      "loss": 0.2801882553100586,
+      "step": 75
+    },
+    {
+      "epoch": 0.017774617845716316,
+      "grad_norm": 0.9834569096565247,
+      "learning_rate": 5.857988165680474e-06,
+      "loss": 0.24047454833984375,
+      "step": 100
+    },
+    {
+      "epoch": 0.022218272307145397,
+      "grad_norm": 1.117211103439331,
+      "learning_rate": 7.337278106508876e-06,
+      "loss": 0.2291146469116211,
+      "step": 125
+    },
+    {
+      "epoch": 0.026661926768574477,
+      "grad_norm": 0.8506172895431519,
+      "learning_rate": 8.816568047337279e-06,
+      "loss": 0.1967698097229004,
+      "step": 150
+    },
+    {
+      "epoch": 0.031105581230003555,
+      "grad_norm": 0.7021474242210388,
+      "learning_rate": 1.029585798816568e-05,
+      "loss": 0.17146373748779298,
+      "step": 175
+    },
+    {
+      "epoch": 0.03554923569143263,
+      "grad_norm": 1.2111107110977173,
+      "learning_rate": 1.1775147928994083e-05,
+      "loss": 0.13825268745422364,
+      "step": 200
+    },
+    {
+      "epoch": 0.03999289015286171,
+      "grad_norm": 1.9403120279312134,
+      "learning_rate": 1.3254437869822488e-05,
+      "loss": 0.12618659019470216,
+      "step": 225
+    },
+    {
+      "epoch": 0.04443654461429079,
+      "grad_norm": 1.8931593894958496,
+      "learning_rate": 1.4733727810650888e-05,
+      "loss": 0.10122986793518067,
+      "step": 250
+    },
+    {
+      "epoch": 0.048880199075719874,
+      "grad_norm": 1.8619073629379272,
+      "learning_rate": 1.621301775147929e-05,
+      "loss": 0.07919074535369873,
+      "step": 275
+    },
+    {
+      "epoch": 0.053323853537148955,
+      "grad_norm": 1.9105793237686157,
+      "learning_rate": 1.7692307692307694e-05,
+      "loss": 0.08386680603027344,
+      "step": 300
+    },
+    {
+      "epoch": 0.05776750799857803,
+      "grad_norm": 2.0332772731781006,
+      "learning_rate": 1.9171597633136098e-05,
+      "loss": 0.08702397346496582,
+      "step": 325
+    },
+    {
+      "epoch": 0.06221116246000711,
+      "grad_norm": 0.9020377993583679,
+      "learning_rate": 1.995839636913767e-05,
+      "loss": 0.06874918460845947,
+      "step": 350
+    },
+    {
+      "epoch": 0.06665481692143618,
+      "grad_norm": 1.6216212511062622,
+      "learning_rate": 1.9863842662632376e-05,
+      "loss": 0.06805606842041016,
+      "step": 375
+    },
+    {
+      "epoch": 0.07109847138286526,
+      "grad_norm": 1.7693337202072144,
+      "learning_rate": 1.9769288956127082e-05,
+      "loss": 0.06352178573608398,
+      "step": 400
+    },
+    {
+      "epoch": 0.07554212584429434,
+      "grad_norm": 1.6724389791488647,
+      "learning_rate": 1.9674735249621784e-05,
+      "loss": 0.0673055648803711,
+      "step": 425
+    },
+    {
+      "epoch": 0.07998578030572343,
+      "grad_norm": 0.5278561115264893,
+      "learning_rate": 1.9580181543116493e-05,
+      "loss": 0.06466075897216797,
+      "step": 450
+    },
+    {
+      "epoch": 0.0844294347671525,
+      "grad_norm": 1.7042737007141113,
+      "learning_rate": 1.9485627836611195e-05,
+      "loss": 0.0630407428741455,
+      "step": 475
+    },
+    {
+      "epoch": 0.08887308922858159,
+      "grad_norm": 0.3513544797897339,
+      "learning_rate": 1.93910741301059e-05,
+      "loss": 0.062327189445495604,
+      "step": 500
+    },
+    {
+      "epoch": 0.08887308922858159,
+      "eval_accuracy": 0.848,
+      "eval_auroc": 0.9866655199587185,
+      "eval_f1": 0.8685121107266436,
+      "eval_loss": 0.054977674037218094,
+      "eval_runtime": 39.8938,
+      "eval_samples_per_second": 50.133,
+      "eval_steps_per_second": 1.579,
+      "eval_tpr_at_fpr1": 0.7455268389662028,
+      "eval_tpr_at_fpr5": 0.937375745526839,
+      "step": 500
+    },
+    {
+      "epoch": 0.09331674369001067,
+      "grad_norm": 2.1725878715515137,
+      "learning_rate": 1.9296520423600606e-05,
+      "loss": 0.05081462860107422,
+      "step": 525
+    },
+    {
+      "epoch": 0.09776039815143975,
+      "grad_norm": 2.587542772293091,
+      "learning_rate": 1.9201966717095312e-05,
+      "loss": 0.06098108291625977,
+      "step": 550
+    },
+    {
+      "epoch": 0.10220405261286883,
+      "grad_norm": 1.1815265417099,
+      "learning_rate": 1.9107413010590018e-05,
+      "loss": 0.04866991996765137,
+      "step": 575
+    },
+    {
+      "epoch": 0.10664770707429791,
+      "grad_norm": 1.140872597694397,
+      "learning_rate": 1.901285930408472e-05,
+      "loss": 0.06058640956878662,
+      "step": 600
+    },
+    {
+      "epoch": 0.11109136153572698,
+      "grad_norm": 1.164772868156433,
+      "learning_rate": 1.891830559757943e-05,
+      "loss": 0.0485923957824707,
+      "step": 625
+    },
+    {
+      "epoch": 0.11553501599715606,
+      "grad_norm": 2.076003074645996,
+      "learning_rate": 1.882375189107413e-05,
+      "loss": 0.05198529243469238,
+      "step": 650
+    },
+    {
+      "epoch": 0.11997867045858514,
+      "grad_norm": 2.8677966594696045,
+      "learning_rate": 1.8729198184568836e-05,
+      "loss": 0.05215679168701172,
+      "step": 675
+    },
+    {
+      "epoch": 0.12442232492001422,
+      "grad_norm": 1.243391752243042,
+      "learning_rate": 1.8634644478063542e-05,
+      "loss": 0.046974472999572754,
+      "step": 700
+    },
+    {
+      "epoch": 0.12886597938144329,
+      "grad_norm": 1.970794916152954,
+      "learning_rate": 1.8540090771558244e-05,
+      "loss": 0.051630439758300783,
+      "step": 725
+    },
+    {
+      "epoch": 0.13330963384287237,
+      "grad_norm": 1.031387448310852,
+      "learning_rate": 1.8445537065052953e-05,
+      "loss": 0.04577981948852539,
+      "step": 750
+    },
+    {
+      "epoch": 0.13775328830430145,
+      "grad_norm": 1.4441957473754883,
+      "learning_rate": 1.8350983358547655e-05,
+      "loss": 0.05677220821380615,
+      "step": 775
+    },
+    {
+      "epoch": 0.14219694276573053,
+      "grad_norm": 1.2302734851837158,
+      "learning_rate": 1.825642965204236e-05,
+      "loss": 0.043911681175231934,
+      "step": 800
+    },
+    {
+      "epoch": 0.1466405972271596,
+      "grad_norm": 0.9389927983283997,
+      "learning_rate": 1.8161875945537066e-05,
+      "loss": 0.04272346019744873,
+      "step": 825
+    },
+    {
+      "epoch": 0.1510842516885887,
+      "grad_norm": 1.342290997505188,
+      "learning_rate": 1.8067322239031772e-05,
+      "loss": 0.054392943382263186,
+      "step": 850
+    },
+    {
+      "epoch": 0.15552790615001777,
+      "grad_norm": 2.6409666538238525,
+      "learning_rate": 1.7972768532526477e-05,
+      "loss": 0.04197061061859131,
+      "step": 875
+    },
+    {
+      "epoch": 0.15997156061144685,
+      "grad_norm": 1.1038918495178223,
+      "learning_rate": 1.787821482602118e-05,
+      "loss": 0.03587212562561035,
+      "step": 900
+    },
+    {
+      "epoch": 0.16441521507287593,
+      "grad_norm": 1.414070725440979,
+      "learning_rate": 1.778366111951589e-05,
+      "loss": 0.04636185646057129,
+      "step": 925
+    },
+    {
+      "epoch": 0.168858869534305,
+      "grad_norm": 2.164773941040039,
+      "learning_rate": 1.768910741301059e-05,
+      "loss": 0.044623188972473145,
+      "step": 950
+    },
+    {
+      "epoch": 0.1733025239957341,
+      "grad_norm": 2.06410813331604,
+      "learning_rate": 1.7594553706505296e-05,
+      "loss": 0.038699045181274414,
+      "step": 975
+    },
+    {
+      "epoch": 0.17774617845716317,
+      "grad_norm": 1.53926420211792,
+      "learning_rate": 1.7500000000000002e-05,
+      "loss": 0.038100283145904544,
+      "step": 1000
+    },
+    {
+      "epoch": 0.17774617845716317,
+      "eval_accuracy": 0.89,
+      "eval_auroc": 0.9912356844846415,
+      "eval_f1": 0.9012567324955117,
+      "eval_loss": 0.042472898960113525,
+      "eval_runtime": 38.57,
+      "eval_samples_per_second": 51.854,
+      "eval_steps_per_second": 1.633,
+      "eval_tpr_at_fpr1": 0.8230616302186878,
+      "eval_tpr_at_fpr5": 0.952286282306163,
+      "step": 1000
+    },
+    {
+      "epoch": 0.18218983291859225,
+      "grad_norm": 0.9646220803260803,
+      "learning_rate": 1.7405446293494704e-05,
+      "loss": 0.03173836708068847,
+      "step": 1025
+    },
+    {
+      "epoch": 0.18663348738002133,
+      "grad_norm": 1.0528196096420288,
+      "learning_rate": 1.7310892586989413e-05,
+      "loss": 0.03988344669342041,
+      "step": 1050
+    },
+    {
+      "epoch": 0.19107714184145042,
+      "grad_norm": 1.5726221799850464,
+      "learning_rate": 1.7216338880484115e-05,
+      "loss": 0.044674863815307615,
+      "step": 1075
+    },
+    {
+      "epoch": 0.1955207963028795,
+      "grad_norm": 1.551660418510437,
+      "learning_rate": 1.712178517397882e-05,
+      "loss": 0.040711288452148435,
+      "step": 1100
+    },
+    {
+      "epoch": 0.19996445076430858,
+      "grad_norm": 1.090385913848877,
+      "learning_rate": 1.7027231467473526e-05,
+      "loss": 0.037872114181518556,
+      "step": 1125
+    },
+    {
+      "epoch": 0.20440810522573766,
+      "grad_norm": 1.1778202056884766,
+      "learning_rate": 1.6932677760968232e-05,
+      "loss": 0.039130420684814454,
+      "step": 1150
+    },
+    {
+      "epoch": 0.20885175968716674,
+      "grad_norm": 1.402064323425293,
+      "learning_rate": 1.6838124054462937e-05,
+      "loss": 0.03875999212265015,
+      "step": 1175
+    },
+    {
+      "epoch": 0.21329541414859582,
+      "grad_norm": 0.7979677319526672,
+      "learning_rate": 1.674357034795764e-05,
+      "loss": 0.033257806301116945,
+      "step": 1200
+    },
+    {
+      "epoch": 0.21773906861002487,
+      "grad_norm": 2.5630085468292236,
+      "learning_rate": 1.664901664145235e-05,
+      "loss": 0.03850275993347168,
+      "step": 1225
+    },
+    {
+      "epoch": 0.22218272307145395,
+      "grad_norm": 1.1255887746810913,
+      "learning_rate": 1.655446293494705e-05,
+      "loss": 0.035763952732086185,
+      "step": 1250
+    },
+    {
+      "epoch": 0.22662637753288303,
+      "grad_norm": 2.648975133895874,
+      "learning_rate": 1.6459909228441756e-05,
+      "loss": 0.04280531883239746,
+      "step": 1275
+    },
+    {
+      "epoch": 0.23107003199431211,
+      "grad_norm": 1.7409067153930664,
+      "learning_rate": 1.6365355521936462e-05,
+      "loss": 0.04235891819000244,
+      "step": 1300
+    },
+    {
+      "epoch": 0.2355136864557412,
+      "grad_norm": 1.5755038261413574,
+      "learning_rate": 1.6270801815431164e-05,
+      "loss": 0.04084760665893555,
+      "step": 1325
+    },
+    {
+      "epoch": 0.23995734091717028,
+      "grad_norm": 1.2442480325698853,
+      "learning_rate": 1.6176248108925873e-05,
+      "loss": 0.03942857027053833,
+      "step": 1350
+    },
+    {
+      "epoch": 0.24440099537859936,
+      "grad_norm": 0.7775816321372986,
+      "learning_rate": 1.6081694402420575e-05,
+      "loss": 0.039197320938110354,
+      "step": 1375
+    },
+    {
+      "epoch": 0.24884464984002844,
+      "grad_norm": 0.44854021072387695,
+      "learning_rate": 1.598714069591528e-05,
+      "loss": 0.030667483806610107,
+      "step": 1400
+    },
+    {
+      "epoch": 0.25328830430145755,
+      "grad_norm": 0.9631138443946838,
+      "learning_rate": 1.5892586989409986e-05,
+      "loss": 0.03460927009582519,
+      "step": 1425
+    },
+    {
+      "epoch": 0.25773195876288657,
+      "grad_norm": 0.8312052488327026,
+      "learning_rate": 1.5798033282904692e-05,
+      "loss": 0.03320029735565186,
+      "step": 1450
+    },
+    {
+      "epoch": 0.26217561322431565,
+      "grad_norm": 1.1160472631454468,
+      "learning_rate": 1.5703479576399397e-05,
+      "loss": 0.03198946952819824,
+      "step": 1475
+    },
+    {
+      "epoch": 0.26661926768574473,
+      "grad_norm": 1.6029430627822876,
+      "learning_rate": 1.56089258698941e-05,
+      "loss": 0.034000282287597654,
+      "step": 1500
+    },
+    {
+      "epoch": 0.26661926768574473,
+      "eval_accuracy": 0.8785,
+      "eval_auroc": 0.9909046725682125,
+      "eval_f1": 0.8921438082556591,
+      "eval_loss": 0.04817873612046242,
+      "eval_runtime": 44.1179,
+      "eval_samples_per_second": 45.333,
+      "eval_steps_per_second": 1.428,
+      "eval_tpr_at_fpr1": 0.8021868787276342,
+      "eval_tpr_at_fpr5": 0.9572564612326043,
+      "step": 1500
+    },
+    {
+      "epoch": 0.2710629221471738,
+      "grad_norm": 2.1138088703155518,
+      "learning_rate": 1.5514372163388805e-05,
+      "loss": 0.03080030918121338,
+      "step": 1525
+    },
+    {
+      "epoch": 0.2755065766086029,
+      "grad_norm": 2.206002950668335,
+      "learning_rate": 1.541981845688351e-05,
+      "loss": 0.04029146194458008,
+      "step": 1550
+    },
+    {
+      "epoch": 0.279950231070032,
+      "grad_norm": 1.8083148002624512,
+      "learning_rate": 1.5325264750378216e-05,
+      "loss": 0.028056590557098388,
+      "step": 1575
+    },
+    {
+      "epoch": 0.28439388553146105,
+      "grad_norm": 0.9714005589485168,
+      "learning_rate": 1.5230711043872922e-05,
+      "loss": 0.033678176403045657,
+      "step": 1600
+    },
+    {
+      "epoch": 0.28883753999289014,
+      "grad_norm": 0.29741403460502625,
+      "learning_rate": 1.5136157337367626e-05,
+      "loss": 0.03180084943771362,
+      "step": 1625
+    },
+    {
+      "epoch": 0.2932811944543192,
+      "grad_norm": 0.516327440738678,
+      "learning_rate": 1.5041603630862331e-05,
+      "loss": 0.03431586980819702,
+      "step": 1650
+    },
+    {
+      "epoch": 0.2977248489157483,
+      "grad_norm": 0.7245854735374451,
+      "learning_rate": 1.4947049924357035e-05,
+      "loss": 0.03058172941207886,
+      "step": 1675
+    },
+    {
+      "epoch": 0.3021685033771774,
+      "grad_norm": 2.4247725009918213,
+      "learning_rate": 1.4852496217851742e-05,
+      "loss": 0.04614161014556885,
+      "step": 1700
+    },
+    {
+      "epoch": 0.30661215783860646,
+      "grad_norm": 1.5023690462112427,
+      "learning_rate": 1.4757942511346446e-05,
+      "loss": 0.03227074861526489,
+      "step": 1725
+    },
+    {
+      "epoch": 0.31105581230003554,
+      "grad_norm": 0.5162255764007568,
+      "learning_rate": 1.466338880484115e-05,
+      "loss": 0.037947914600372314,
+      "step": 1750
+    },
+    {
+      "epoch": 0.3154994667614646,
+      "grad_norm": 0.8927256464958191,
+      "learning_rate": 1.4568835098335856e-05,
+      "loss": 0.04013613700866699,
+      "step": 1775
+    },
+    {
+      "epoch": 0.3199431212228937,
+      "grad_norm": 1.9027554988861084,
+      "learning_rate": 1.447428139183056e-05,
+      "loss": 0.04084087371826172,
+      "step": 1800
+    },
+    {
+      "epoch": 0.3243867756843228,
+      "grad_norm": 1.0974030494689941,
+      "learning_rate": 1.4379727685325267e-05,
+      "loss": 0.02415942192077637,
+      "step": 1825
+    },
+    {
+      "epoch": 0.32883043014575186,
+      "grad_norm": 1.2848249673843384,
+      "learning_rate": 1.428517397881997e-05,
+      "loss": 0.03221212863922119,
+      "step": 1850
+    },
+    {
+      "epoch": 0.33327408460718094,
+      "grad_norm": 0.8059474229812622,
+      "learning_rate": 1.4190620272314676e-05,
+      "loss": 0.037272207736968994,
+      "step": 1875
+    },
+    {
+      "epoch": 0.33771773906861,
+      "grad_norm": 1.0132513046264648,
+      "learning_rate": 1.4096066565809382e-05,
+      "loss": 0.029253509044647217,
+      "step": 1900
+    },
+    {
+      "epoch": 0.3421613935300391,
+      "grad_norm": 0.7545719742774963,
+      "learning_rate": 1.4001512859304086e-05,
+      "loss": 0.03997385501861572,
+      "step": 1925
+    },
+    {
+      "epoch": 0.3466050479914682,
+      "grad_norm": 0.37751272320747375,
+      "learning_rate": 1.3906959152798791e-05,
+      "loss": 0.027481729984283446,
+      "step": 1950
+    },
+    {
+      "epoch": 0.35104870245289727,
+      "grad_norm": 1.522934079170227,
+      "learning_rate": 1.3812405446293495e-05,
+      "loss": 0.028633484840393065,
+      "step": 1975
+    },
+    {
+      "epoch": 0.35549235691432635,
+      "grad_norm": 1.2354328632354736,
+      "learning_rate": 1.3717851739788202e-05,
+      "loss": 0.022525691986083986,
+      "step": 2000
+    },
+    {
+      "epoch": 0.35549235691432635,
+      "eval_accuracy": 0.936,
+      "eval_auroc": 0.9948458144493202,
+      "eval_f1": 0.9399624765478424,
+      "eval_loss": 0.030829520896077156,
+      "eval_runtime": 40.0972,
+      "eval_samples_per_second": 49.879,
+      "eval_steps_per_second": 1.571,
+      "eval_tpr_at_fpr1": 0.9125248508946322,
+      "eval_tpr_at_fpr5": 0.9781312127236581,
+      "step": 2000
+    },
+    {
+      "epoch": 0.3599360113757554,
+      "grad_norm": 1.0933098793029785,
+      "learning_rate": 1.3623298033282906e-05,
+      "loss": 0.03529852867126465,
+      "step": 2025
+    },
+    {
+      "epoch": 0.3643796658371845,
+      "grad_norm": 2.0476551055908203,
+      "learning_rate": 1.352874432677761e-05,
+      "loss": 0.03386972665786743,
+      "step": 2050
+    },
+    {
+      "epoch": 0.3688233202986136,
+      "grad_norm": 0.9298884868621826,
+      "learning_rate": 1.3434190620272315e-05,
+      "loss": 0.03486936330795288,
+      "step": 2075
+    },
+    {
+      "epoch": 0.37326697476004267,
+      "grad_norm": 0.7512989044189453,
+      "learning_rate": 1.333963691376702e-05,
+      "loss": 0.0267183780670166,
+      "step": 2100
+    },
+    {
+      "epoch": 0.37771062922147175,
+      "grad_norm": 1.4821196794509888,
+      "learning_rate": 1.3245083207261727e-05,
+      "loss": 0.021263403892517088,
+      "step": 2125
+    },
+    {
+      "epoch": 0.38215428368290083,
+      "grad_norm": 0.8745072484016418,
+      "learning_rate": 1.315052950075643e-05,
+      "loss": 0.0272180438041687,
+      "step": 2150
+    },
+    {
+      "epoch": 0.3865979381443299,
+      "grad_norm": 1.6741865873336792,
+      "learning_rate": 1.3055975794251136e-05,
+      "loss": 0.026980955600738526,
+      "step": 2175
+    },
+    {
+      "epoch": 0.391041592605759,
+      "grad_norm": 0.8200652599334717,
+      "learning_rate": 1.2961422087745842e-05,
+      "loss": 0.027142252922058106,
+      "step": 2200
+    },
+    {
+      "epoch": 0.3954852470671881,
+      "grad_norm": 1.5616494417190552,
+      "learning_rate": 1.2866868381240545e-05,
+      "loss": 0.030536642074584962,
+      "step": 2225
+    },
+    {
+      "epoch": 0.39992890152861715,
+      "grad_norm": 0.7505294680595398,
+      "learning_rate": 1.2772314674735251e-05,
+      "loss": 0.03007654905319214,
+      "step": 2250
+    },
+    {
+      "epoch": 0.40437255599004623,
+      "grad_norm": 0.3857294023036957,
+      "learning_rate": 1.2677760968229955e-05,
+      "loss": 0.02641111135482788,
+      "step": 2275
+    },
+    {
+      "epoch": 0.4088162104514753,
+      "grad_norm": 0.9879816174507141,
+      "learning_rate": 1.2583207261724662e-05,
+      "loss": 0.027107694149017335,
+      "step": 2300
+    },
+    {
+      "epoch": 0.4132598649129044,
+      "grad_norm": 0.5398420095443726,
+      "learning_rate": 1.2488653555219366e-05,
+      "loss": 0.027692139148712158,
+      "step": 2325
+    },
+    {
+      "epoch": 0.4177035193743335,
+      "grad_norm": 0.8365870118141174,
+      "learning_rate": 1.239409984871407e-05,
+      "loss": 0.029139807224273683,
+      "step": 2350
+    },
+    {
+      "epoch": 0.42214717383576256,
+      "grad_norm": 1.1654356718063354,
+      "learning_rate": 1.2299546142208775e-05,
+      "loss": 0.025624027252197267,
+      "step": 2375
+    },
+    {
+      "epoch": 0.42659082829719164,
+      "grad_norm": 0.8927724361419678,
+      "learning_rate": 1.220499243570348e-05,
+      "loss": 0.032838408946990964,
+      "step": 2400
+    },
+    {
+      "epoch": 0.43103448275862066,
+      "grad_norm": 1.7535399198532104,
+      "learning_rate": 1.2110438729198187e-05,
+      "loss": 0.03213581085205078,
+      "step": 2425
+    },
+    {
+      "epoch": 0.43547813722004974,
+      "grad_norm": 1.506422996520996,
+      "learning_rate": 1.201588502269289e-05,
+      "loss": 0.031965067386627195,
+      "step": 2450
+    },
+    {
+      "epoch": 0.4399217916814788,
+      "grad_norm": 1.933950424194336,
+      "learning_rate": 1.1921331316187596e-05,
+      "loss": 0.02685023784637451,
+      "step": 2475
+    },
+    {
+      "epoch": 0.4443654461429079,
+      "grad_norm": 0.8511770963668823,
+      "learning_rate": 1.18267776096823e-05,
+      "loss": 0.03357476472854614,
+      "step": 2500
+    },
+    {
+      "epoch": 0.4443654461429079,
+      "eval_accuracy": 0.9095,
+      "eval_auroc": 0.9952128276617958,
+      "eval_f1": 0.9173893199452305,
+      "eval_loss": 0.030984506011009216,
+      "eval_runtime": 40.8627,
+      "eval_samples_per_second": 48.944,
+      "eval_steps_per_second": 1.542,
+      "eval_tpr_at_fpr1": 0.856858846918489,
+      "eval_tpr_at_fpr5": 0.9821073558648111,
+      "step": 2500
+    },
+    {
+      "epoch": 0.448809100604337,
+      "grad_norm": 1.3411929607391357,
+      "learning_rate": 1.1732223903177005e-05,
+      "loss": 0.026271984577178956,
+      "step": 2525
+    },
+    {
+      "epoch": 0.45325275506576607,
+      "grad_norm": 1.3650128841400146,
+      "learning_rate": 1.1637670196671711e-05,
+      "loss": 0.030547237396240233,
+      "step": 2550
+    },
+    {
+      "epoch": 0.45769640952719515,
+      "grad_norm": 0.7035048007965088,
+      "learning_rate": 1.1543116490166415e-05,
+      "loss": 0.026542000770568848,
+      "step": 2575
+    },
+    {
+      "epoch": 0.46214006398862423,
+      "grad_norm": 1.3388855457305908,
+      "learning_rate": 1.1448562783661122e-05,
+      "loss": 0.024521036148071287,
+      "step": 2600
+    },
+    {
+      "epoch": 0.4665837184500533,
+      "grad_norm": 1.0085132122039795,
+      "learning_rate": 1.1354009077155826e-05,
+      "loss": 0.024952406883239745,
+      "step": 2625
+    },
+    {
+      "epoch": 0.4710273729114824,
+      "grad_norm": 0.30464261770248413,
+      "learning_rate": 1.125945537065053e-05,
+      "loss": 0.02288907766342163,
+      "step": 2650
+    },
+    {
+      "epoch": 0.47547102737291147,
+      "grad_norm": 0.6784248948097229,
+      "learning_rate": 1.1164901664145235e-05,
+      "loss": 0.024585678577423095,
+      "step": 2675
+    },
+    {
+      "epoch": 0.47991468183434055,
+      "grad_norm": 1.2737281322479248,
+      "learning_rate": 1.107034795763994e-05,
+      "loss": 0.028601126670837404,
+      "step": 2700
+    },
+    {
+      "epoch": 0.48435833629576963,
+      "grad_norm": 1.2060391902923584,
+      "learning_rate": 1.0975794251134646e-05,
+      "loss": 0.03009690284729004,
+      "step": 2725
+    },
+    {
+      "epoch": 0.4888019907571987,
+      "grad_norm": 0.9331129789352417,
+      "learning_rate": 1.088124054462935e-05,
+      "loss": 0.024897255897521973,
+      "step": 2750
+    },
+    {
+      "epoch": 0.4932456452186278,
+      "grad_norm": 0.7035834789276123,
+      "learning_rate": 1.0786686838124056e-05,
+      "loss": 0.029437661170959473,
+      "step": 2775
+    },
+    {
+      "epoch": 0.4976892996800569,
+      "grad_norm": 1.3447843790054321,
+      "learning_rate": 1.069213313161876e-05,
+      "loss": 0.024274458885192873,
+      "step": 2800
+    },
+    {
+      "epoch": 0.502132954141486,
+      "grad_norm": 0.7223392724990845,
+      "learning_rate": 1.0597579425113464e-05,
+      "loss": 0.029445352554321288,
+      "step": 2825
+    },
+    {
+      "epoch": 0.5065766086029151,
+      "grad_norm": 1.4334781169891357,
+      "learning_rate": 1.0503025718608171e-05,
+      "loss": 0.0277593731880188,
+      "step": 2850
+    },
+    {
+      "epoch": 0.5110202630643441,
+      "grad_norm": 1.4802097082138062,
+      "learning_rate": 1.0408472012102875e-05,
+      "loss": 0.029638910293579103,
+      "step": 2875
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 1.0358779430389404,
+      "learning_rate": 1.031391830559758e-05,
+      "loss": 0.021964311599731445,
+      "step": 2900
+    },
+    {
+      "epoch": 0.5199075719872023,
+      "grad_norm": 0.5717483758926392,
+      "learning_rate": 1.0219364599092286e-05,
+      "loss": 0.02695397138595581,
+      "step": 2925
+    },
+    {
+      "epoch": 0.5243512264486313,
+      "grad_norm": 0.5034074783325195,
+      "learning_rate": 1.012481089258699e-05,
+      "loss": 0.02091419219970703,
+      "step": 2950
+    },
+    {
+      "epoch": 0.5287948809100604,
+      "grad_norm": 1.093700647354126,
+      "learning_rate": 1.0030257186081695e-05,
+      "loss": 0.018702698945999144,
+      "step": 2975
+    },
+    {
+      "epoch": 0.5332385353714895,
+      "grad_norm": 0.5766699910163879,
+      "learning_rate": 9.935703479576401e-06,
+      "loss": 0.02352435827255249,
+      "step": 3000
+    },
+    {
+      "epoch": 0.5332385353714895,
+      "eval_accuracy": 0.903,
+      "eval_auroc": 0.992703737334544,
+      "eval_f1": 0.9120580235720762,
+      "eval_loss": 0.036512941122055054,
+      "eval_runtime": 40.4309,
+      "eval_samples_per_second": 49.467,
+      "eval_steps_per_second": 1.558,
+      "eval_tpr_at_fpr1": 0.852882703777336,
+      "eval_tpr_at_fpr5": 0.9582504970178927,
+      "step": 3000
+    },
+    {
+      "epoch": 0.5376821898329186,
+      "grad_norm": 1.6101889610290527,
+      "learning_rate": 9.841149773071105e-06,
+      "loss": 0.02831456422805786,
+      "step": 3025
+    },
+    {
+      "epoch": 0.5421258442943476,
+      "grad_norm": 1.7061760425567627,
+      "learning_rate": 9.74659606656581e-06,
+      "loss": 0.023692820072174072,
+      "step": 3050
+    },
+    {
+      "epoch": 0.5465694987557768,
+      "grad_norm": 1.293489933013916,
+      "learning_rate": 9.652042360060516e-06,
+      "loss": 0.021458499431610108,
+      "step": 3075
+    },
+    {
+      "epoch": 0.5510131532172058,
+      "grad_norm": 1.280171513557434,
+      "learning_rate": 9.55748865355522e-06,
+      "loss": 0.023303213119506835,
+      "step": 3100
+    },
+    {
+      "epoch": 0.5554568076786349,
+      "grad_norm": 1.2874751091003418,
+      "learning_rate": 9.462934947049925e-06,
+      "loss": 0.027433459758758546,
+      "step": 3125
+    },
+    {
+      "epoch": 0.559900462140064,
+      "grad_norm": 1.2265180349349976,
+      "learning_rate": 9.36838124054463e-06,
+      "loss": 0.02306551456451416,
+      "step": 3150
+    },
+    {
+      "epoch": 0.5643441166014931,
+      "grad_norm": 2.207395076751709,
+      "learning_rate": 9.273827534039335e-06,
+      "loss": 0.030330984592437743,
+      "step": 3175
+    },
+    {
+      "epoch": 0.5687877710629221,
+      "grad_norm": 0.700985312461853,
+      "learning_rate": 9.17927382753404e-06,
+      "loss": 0.02533963918685913,
+      "step": 3200
+    },
+    {
+      "epoch": 0.5732314255243512,
+      "grad_norm": 0.8443852663040161,
+      "learning_rate": 9.084720121028746e-06,
+      "loss": 0.029710006713867188,
+      "step": 3225
+    },
+    {
+      "epoch": 0.5776750799857803,
+      "grad_norm": 0.5237564444541931,
+      "learning_rate": 8.99016641452345e-06,
+      "loss": 0.02827130079269409,
+      "step": 3250
+    },
+    {
+      "epoch": 0.5821187344472094,
+      "grad_norm": 1.318710446357727,
+      "learning_rate": 8.895612708018155e-06,
+      "loss": 0.017649848461151123,
+      "step": 3275
+    },
+    {
+      "epoch": 0.5865623889086384,
+      "grad_norm": 2.1418726444244385,
+      "learning_rate": 8.80105900151286e-06,
+      "loss": 0.028039700984954834,
+      "step": 3300
+    },
+    {
+      "epoch": 0.5910060433700676,
+      "grad_norm": 0.6394239068031311,
+      "learning_rate": 8.706505295007565e-06,
+      "loss": 0.029724645614624023,
+      "step": 3325
+    },
+    {
+      "epoch": 0.5954496978314966,
+      "grad_norm": 0.44896772503852844,
+      "learning_rate": 8.61195158850227e-06,
+      "loss": 0.026093797683715822,
+      "step": 3350
+    },
+    {
+      "epoch": 0.5998933522929257,
+      "grad_norm": 2.3762757778167725,
+      "learning_rate": 8.517397881996974e-06,
+      "loss": 0.027712843418121337,
+      "step": 3375
+    },
+    {
+      "epoch": 0.6043370067543548,
+      "grad_norm": 1.4584051370620728,
+      "learning_rate": 8.42284417549168e-06,
+      "loss": 0.031196737289428712,
+      "step": 3400
+    },
+    {
+      "epoch": 0.6087806612157839,
+      "grad_norm": 2.2569475173950195,
+      "learning_rate": 8.328290468986385e-06,
+      "loss": 0.026621932983398437,
+      "step": 3425
+    },
+    {
+      "epoch": 0.6132243156772129,
+      "grad_norm": 1.9737194776535034,
+      "learning_rate": 8.23373676248109e-06,
+      "loss": 0.020163617134094237,
+      "step": 3450
+    },
+    {
+      "epoch": 0.617667970138642,
+      "grad_norm": 0.9083975553512573,
+      "learning_rate": 8.139183055975795e-06,
+      "loss": 0.0209149169921875,
+      "step": 3475
+    },
+    {
+      "epoch": 0.6221116246000711,
+      "grad_norm": 1.0563571453094482,
+      "learning_rate": 8.0446293494705e-06,
+      "loss": 0.025532805919647218,
+      "step": 3500
+    },
+    {
+      "epoch": 0.6221116246000711,
+      "eval_accuracy": 0.8925,
+      "eval_auroc": 0.9965268749674989,
+      "eval_f1": 0.9033707865168539,
+      "eval_loss": 0.029821457341313362,
+      "eval_runtime": 40.7571,
+      "eval_samples_per_second": 49.071,
+      "eval_steps_per_second": 1.546,
+      "eval_tpr_at_fpr1": 0.9254473161033797,
+      "eval_tpr_at_fpr5": 0.9850894632206759,
+      "step": 3500
+    },
+    {
+      "epoch": 0.6265552790615002,
+      "grad_norm": 1.6384830474853516,
+      "learning_rate": 7.950075642965204e-06,
+      "loss": 0.018657710552215576,
+      "step": 3525
+    },
+    {
+      "epoch": 0.6309989335229292,
+      "grad_norm": 1.4129881858825684,
+      "learning_rate": 7.85552193645991e-06,
+      "loss": 0.027512576580047608,
+      "step": 3550
+    },
+    {
+      "epoch": 0.6354425879843584,
+      "grad_norm": 1.2471665143966675,
+      "learning_rate": 7.760968229954615e-06,
+      "loss": 0.029382569789886473,
+      "step": 3575
+    },
+    {
+      "epoch": 0.6398862424457874,
+      "grad_norm": 1.1254513263702393,
+      "learning_rate": 7.66641452344932e-06,
+      "loss": 0.023775274753570556,
+      "step": 3600
+    },
+    {
+      "epoch": 0.6443298969072165,
+      "grad_norm": 0.9185925126075745,
+      "learning_rate": 7.571860816944025e-06,
+      "loss": 0.025625219345092775,
+      "step": 3625
+    },
+    {
+      "epoch": 0.6487735513686456,
+      "grad_norm": 0.9741719961166382,
+      "learning_rate": 7.477307110438729e-06,
+      "loss": 0.014400173425674439,
+      "step": 3650
+    },
+    {
+      "epoch": 0.6532172058300747,
+      "grad_norm": 1.5722410678863525,
+      "learning_rate": 7.382753403933435e-06,
+      "loss": 0.016961036920547484,
+      "step": 3675
+    },
+    {
+      "epoch": 0.6576608602915037,
+      "grad_norm": 1.0956284999847412,
+      "learning_rate": 7.28819969742814e-06,
+      "loss": 0.029399728775024413,
+      "step": 3700
+    },
+    {
+      "epoch": 0.6621045147529329,
+      "grad_norm": 1.8072013854980469,
+      "learning_rate": 7.193645990922845e-06,
+      "loss": 0.02603915214538574,
+      "step": 3725
+    },
+    {
+      "epoch": 0.6665481692143619,
+      "grad_norm": 1.4998871088027954,
+      "learning_rate": 7.09909228441755e-06,
+      "loss": 0.018154734373092653,
+      "step": 3750
+    },
+    {
+      "epoch": 0.670991823675791,
+      "grad_norm": 1.015345573425293,
+      "learning_rate": 7.004538577912255e-06,
+      "loss": 0.020612461566925047,
+      "step": 3775
+    },
+    {
+      "epoch": 0.67543547813722,
+      "grad_norm": 0.518636167049408,
+      "learning_rate": 6.909984871406959e-06,
+      "loss": 0.020666675567626955,
+      "step": 3800
+    },
+    {
+      "epoch": 0.6798791325986492,
+      "grad_norm": 1.4760479927062988,
+      "learning_rate": 6.815431164901665e-06,
+      "loss": 0.022126734256744385,
+      "step": 3825
+    },
+    {
+      "epoch": 0.6843227870600782,
+      "grad_norm": 0.5096405744552612,
+      "learning_rate": 6.7208774583963696e-06,
+      "loss": 0.023763720989227296,
+      "step": 3850
+    },
+    {
+      "epoch": 0.6887664415215072,
+      "grad_norm": 0.7516443133354187,
+      "learning_rate": 6.626323751891075e-06,
+      "loss": 0.023717043399810792,
+      "step": 3875
+    },
+    {
+      "epoch": 0.6932100959829364,
+      "grad_norm": 0.8385019898414612,
+      "learning_rate": 6.53177004538578e-06,
+      "loss": 0.021878042221069337,
+      "step": 3900
+    },
+    {
+      "epoch": 0.6976537504443654,
+      "grad_norm": 1.1693350076675415,
+      "learning_rate": 6.4372163388804845e-06,
+      "loss": 0.014371514320373535,
+      "step": 3925
+    },
+    {
+      "epoch": 0.7020974049057945,
+      "grad_norm": 1.4496546983718872,
+      "learning_rate": 6.342662632375189e-06,
+      "loss": 0.021327991485595704,
+      "step": 3950
+    },
+    {
+      "epoch": 0.7065410593672236,
+      "grad_norm": 1.0142734050750732,
+      "learning_rate": 6.248108925869895e-06,
+      "loss": 0.023486480712890626,
+      "step": 3975
+    },
+    {
+      "epoch": 0.7109847138286527,
+      "grad_norm": 0.4203350245952606,
+      "learning_rate": 6.1535552193645995e-06,
+      "loss": 0.023264715671539305,
+      "step": 4000
+    },
+    {
+      "epoch": 0.7109847138286527,
+      "eval_accuracy": 0.9335,
+      "eval_auroc": 0.9970688944802013,
+      "eval_f1": 0.9379374708352777,
+      "eval_loss": 0.02452407218515873,
+      "eval_runtime": 40.1446,
+      "eval_samples_per_second": 49.82,
+      "eval_steps_per_second": 1.569,
+      "eval_tpr_at_fpr1": 0.9264413518886679,
+      "eval_tpr_at_fpr5": 0.9850894632206759,
+      "step": 4000
+    },
+    {
+      "epoch": 0.7154283682900817,
+      "grad_norm": 0.6930143237113953,
+      "learning_rate": 6.059001512859305e-06,
+      "loss": 0.01856299042701721,
+      "step": 4025
+    },
+    {
+      "epoch": 0.7198720227515109,
+      "grad_norm": 1.0674962997436523,
+      "learning_rate": 5.96444780635401e-06,
+      "loss": 0.023626606464385986,
+      "step": 4050
+    },
+    {
+      "epoch": 0.7243156772129399,
+      "grad_norm": 0.6356366276741028,
+      "learning_rate": 5.8698940998487145e-06,
+      "loss": 0.023326983451843263,
+      "step": 4075
+    },
+    {
+      "epoch": 0.728759331674369,
+      "grad_norm": 0.8227376937866211,
+      "learning_rate": 5.775340393343419e-06,
+      "loss": 0.02331566095352173,
+      "step": 4100
+    },
+    {
+      "epoch": 0.733202986135798,
+      "grad_norm": 2.189657211303711,
+      "learning_rate": 5.680786686838125e-06,
+      "loss": 0.01987994074821472,
+      "step": 4125
+    },
+    {
+      "epoch": 0.7376466405972272,
+      "grad_norm": 0.46455055475234985,
+      "learning_rate": 5.5862329803328295e-06,
+      "loss": 0.01780161142349243,
+      "step": 4150
+    },
+    {
+      "epoch": 0.7420902950586562,
+      "grad_norm": 0.7525627017021179,
+      "learning_rate": 5.491679273827535e-06,
+      "loss": 0.02872683048248291,
+      "step": 4175
+    },
+    {
+      "epoch": 0.7465339495200853,
+      "grad_norm": 0.9939025640487671,
+      "learning_rate": 5.39712556732224e-06,
+      "loss": 0.021651785373687744,
+      "step": 4200
+    },
+    {
+      "epoch": 0.7509776039815144,
+      "grad_norm": 0.5748035907745361,
+      "learning_rate": 5.3025718608169445e-06,
+      "loss": 0.01857919096946716,
+      "step": 4225
+    },
+    {
+      "epoch": 0.7554212584429435,
+      "grad_norm": 1.1377756595611572,
+      "learning_rate": 5.208018154311649e-06,
+      "loss": 0.021290059089660644,
+      "step": 4250
+    },
+    {
+      "epoch": 0.7598649129043725,
+      "grad_norm": 1.592410683631897,
+      "learning_rate": 5.113464447806355e-06,
+      "loss": 0.01949896812438965,
+      "step": 4275
+    },
+    {
+      "epoch": 0.7643085673658017,
+      "grad_norm": 1.3217352628707886,
+      "learning_rate": 5.0189107413010595e-06,
+      "loss": 0.024791300296783447,
+      "step": 4300
+    },
+    {
+      "epoch": 0.7687522218272307,
+      "grad_norm": 0.3922988474369049,
+      "learning_rate": 4.924357034795764e-06,
+      "loss": 0.021808433532714843,
+      "step": 4325
+    },
+    {
+      "epoch": 0.7731958762886598,
+      "grad_norm": 0.45381656289100647,
+      "learning_rate": 4.82980332829047e-06,
+      "loss": 0.020455398559570313,
+      "step": 4350
+    },
+    {
+      "epoch": 0.7776395307500888,
+      "grad_norm": 0.8540909886360168,
+      "learning_rate": 4.7352496217851745e-06,
+      "loss": 0.023225700855255126,
+      "step": 4375
+    },
+    {
+      "epoch": 0.782083185211518,
+      "grad_norm": 2.9069783687591553,
+      "learning_rate": 4.640695915279879e-06,
+      "loss": 0.023292510509490966,
+      "step": 4400
+    },
+    {
+      "epoch": 0.786526839672947,
+      "grad_norm": 0.9787670969963074,
+      "learning_rate": 4.546142208774585e-06,
+      "loss": 0.024890389442443848,
+      "step": 4425
+    },
+    {
+      "epoch": 0.7909704941343761,
+      "grad_norm": 1.0303661823272705,
+      "learning_rate": 4.4515885022692894e-06,
+      "loss": 0.023072149753570557,
+      "step": 4450
+    },
+    {
+      "epoch": 0.7954141485958052,
+      "grad_norm": 2.1931862831115723,
+      "learning_rate": 4.357034795763994e-06,
+      "loss": 0.032431015968322756,
+      "step": 4475
+    },
+    {
+      "epoch": 0.7998578030572343,
+      "grad_norm": 0.7739485502243042,
+      "learning_rate": 4.2624810892587e-06,
+      "loss": 0.020095715522766112,
+      "step": 4500
+    },
+    {
+      "epoch": 0.7998578030572343,
+      "eval_accuracy": 0.9275,
+      "eval_auroc": 0.9970798948762156,
+      "eval_f1": 0.9327146171693736,
+      "eval_loss": 0.024173183366656303,
+      "eval_runtime": 39.1522,
+      "eval_samples_per_second": 51.083,
+      "eval_steps_per_second": 1.609,
+      "eval_tpr_at_fpr1": 0.9224652087475149,
+      "eval_tpr_at_fpr5": 0.9900596421471173,
+      "step": 4500
+    },
+    {
+      "epoch": 0.8043014575186633,
+      "grad_norm": 1.5598735809326172,
+      "learning_rate": 4.167927382753404e-06,
+      "loss": 0.024987099170684816,
+      "step": 4525
+    },
+    {
+      "epoch": 0.8087451119800925,
+      "grad_norm": 1.1426900625228882,
+      "learning_rate": 4.073373676248109e-06,
+      "loss": 0.01934351325035095,
+      "step": 4550
+    },
+    {
+      "epoch": 0.8131887664415215,
+      "grad_norm": 0.3795163333415985,
+      "learning_rate": 3.978819969742814e-06,
+      "loss": 0.020940425395965575,
+      "step": 4575
+    },
+    {
+      "epoch": 0.8176324209029506,
+      "grad_norm": 1.1596218347549438,
+      "learning_rate": 3.884266263237519e-06,
+      "loss": 0.027658913135528564,
+      "step": 4600
+    },
+    {
+      "epoch": 0.8220760753643797,
+      "grad_norm": 1.05118989944458,
+      "learning_rate": 3.789712556732224e-06,
+      "loss": 0.016726157665252685,
+      "step": 4625
+    },
+    {
+      "epoch": 0.8265197298258088,
+      "grad_norm": 0.994926393032074,
+      "learning_rate": 3.6951588502269293e-06,
+      "loss": 0.013396300077438354,
+      "step": 4650
+    },
+    {
+      "epoch": 0.8309633842872378,
+      "grad_norm": 2.416964054107666,
+      "learning_rate": 3.6006051437216344e-06,
+      "loss": 0.02318723201751709,
+      "step": 4675
+    },
+    {
+      "epoch": 0.835407038748667,
+      "grad_norm": 2.359633445739746,
+      "learning_rate": 3.506051437216339e-06,
+      "loss": 0.02345597982406616,
+      "step": 4700
+    },
+    {
+      "epoch": 0.839850693210096,
+      "grad_norm": 1.0586191415786743,
+      "learning_rate": 3.4114977307110442e-06,
+      "loss": 0.021095492839813233,
+      "step": 4725
+    },
+    {
+      "epoch": 0.8442943476715251,
+      "grad_norm": 1.2005938291549683,
+      "learning_rate": 3.3169440242057494e-06,
+      "loss": 0.023975539207458495,
+      "step": 4750
+    },
+    {
+      "epoch": 0.8487380021329541,
+      "grad_norm": 0.22911959886550903,
+      "learning_rate": 3.222390317700454e-06,
+      "loss": 0.019334245920181275,
+      "step": 4775
+    },
+    {
+      "epoch": 0.8531816565943833,
+      "grad_norm": 1.3965319395065308,
+      "learning_rate": 3.1278366111951592e-06,
+      "loss": 0.027639262676239014,
+      "step": 4800
+    },
+    {
+      "epoch": 0.8576253110558123,
+      "grad_norm": 0.15118920803070068,
+      "learning_rate": 3.0332829046898644e-06,
+      "loss": 0.020000927448272705,
+      "step": 4825
+    },
+    {
+      "epoch": 0.8620689655172413,
+      "grad_norm": 1.7333295345306396,
+      "learning_rate": 2.938729198184569e-06,
+      "loss": 0.02153873920440674,
+      "step": 4850
+    },
+    {
+      "epoch": 0.8665126199786705,
+      "grad_norm": 0.2823106348514557,
+      "learning_rate": 2.844175491679274e-06,
+      "loss": 0.014718363285064697,
+      "step": 4875
+    },
+    {
+      "epoch": 0.8709562744400995,
+      "grad_norm": 0.735140323638916,
+      "learning_rate": 2.7496217851739793e-06,
+      "loss": 0.01869586229324341,
+      "step": 4900
+    },
+    {
+      "epoch": 0.8753999289015286,
+      "grad_norm": 0.4756013751029968,
+      "learning_rate": 2.655068078668684e-06,
+      "loss": 0.01766459345817566,
+      "step": 4925
+    },
+    {
+      "epoch": 0.8798435833629576,
+      "grad_norm": 1.9793126583099365,
+      "learning_rate": 2.560514372163389e-06,
+      "loss": 0.01844774007797241,
+      "step": 4950
+    },
+    {
+      "epoch": 0.8842872378243868,
+      "grad_norm": 0.3258880376815796,
+      "learning_rate": 2.465960665658094e-06,
+      "loss": 0.01422677755355835,
+      "step": 4975
+    },
+    {
+      "epoch": 0.8887308922858158,
+      "grad_norm": 0.9487712383270264,
+      "learning_rate": 2.371406959152799e-06,
+      "loss": 0.016807562112808226,
+      "step": 5000
+    },
+    {
+      "epoch": 0.8887308922858158,
+      "eval_accuracy": 0.9295,
+      "eval_auroc": 0.9969978919241093,
+      "eval_f1": 0.9344490934449095,
+      "eval_loss": 0.024602515622973442,
+      "eval_runtime": 39.8771,
+      "eval_samples_per_second": 50.154,
+      "eval_steps_per_second": 1.58,
+      "eval_tpr_at_fpr1": 0.889662027833002,
+      "eval_tpr_at_fpr5": 0.9910536779324056,
+      "step": 5000
+    },
+    {
+      "epoch": 0.893174546747245,
+      "grad_norm": 1.8331549167633057,
+      "learning_rate": 2.276853252647504e-06,
+      "loss": 0.019318313598632814,
+      "step": 5025
+    },
+    {
+      "epoch": 0.897618201208674,
+      "grad_norm": 0.5335781574249268,
+      "learning_rate": 2.182299546142209e-06,
+      "loss": 0.016727542877197264,
+      "step": 5050
+    },
+    {
+      "epoch": 0.9020618556701031,
+      "grad_norm": 2.0710813999176025,
+      "learning_rate": 2.087745839636914e-06,
+      "loss": 0.024016971588134765,
+      "step": 5075
+    },
+    {
+      "epoch": 0.9065055101315321,
+      "grad_norm": 0.5005258321762085,
+      "learning_rate": 1.993192133131619e-06,
+      "loss": 0.023308300971984865,
+      "step": 5100
+    },
+    {
+      "epoch": 0.9109491645929613,
+      "grad_norm": 0.8444198369979858,
+      "learning_rate": 1.8986384266263239e-06,
+      "loss": 0.013868091106414794,
+      "step": 5125
+    },
+    {
+      "epoch": 0.9153928190543903,
+      "grad_norm": 1.0288333892822266,
+      "learning_rate": 1.8040847201210288e-06,
+      "loss": 0.02429831266403198,
+      "step": 5150
+    },
+    {
+      "epoch": 0.9198364735158194,
+      "grad_norm": 0.5995722413063049,
+      "learning_rate": 1.709531013615734e-06,
+      "loss": 0.020702524185180662,
+      "step": 5175
+    },
+    {
+      "epoch": 0.9242801279772485,
+      "grad_norm": 1.5560880899429321,
+      "learning_rate": 1.6149773071104389e-06,
+      "loss": 0.014829163551330566,
+      "step": 5200
+    },
+    {
+      "epoch": 0.9287237824386776,
+      "grad_norm": 1.271360993385315,
+      "learning_rate": 1.5204236006051438e-06,
+      "loss": 0.020302300453186036,
+      "step": 5225
+    },
+    {
+      "epoch": 0.9331674369001066,
+      "grad_norm": 2.036619186401367,
+      "learning_rate": 1.425869894099849e-06,
+      "loss": 0.023549365997314452,
+      "step": 5250
+    },
+    {
+      "epoch": 0.9376110913615358,
+      "grad_norm": 1.7285027503967285,
+      "learning_rate": 1.3313161875945538e-06,
+      "loss": 0.02568220853805542,
+      "step": 5275
+    },
+    {
+      "epoch": 0.9420547458229648,
+      "grad_norm": 1.4646673202514648,
+      "learning_rate": 1.2367624810892588e-06,
+      "loss": 0.030799252986907957,
+      "step": 5300
+    },
+    {
+      "epoch": 0.9464984002843939,
+      "grad_norm": 0.6867812871932983,
+      "learning_rate": 1.142208774583964e-06,
+      "loss": 0.018717833757400514,
+      "step": 5325
+    },
+    {
+      "epoch": 0.9509420547458229,
+      "grad_norm": 0.2627001702785492,
+      "learning_rate": 1.0476550680786688e-06,
+      "loss": 0.01653684616088867,
+      "step": 5350
+    },
+    {
+      "epoch": 0.9553857092072521,
+      "grad_norm": 0.30518868565559387,
+      "learning_rate": 9.531013615733737e-07,
+      "loss": 0.022223813533782957,
+      "step": 5375
+    },
+    {
+      "epoch": 0.9598293636686811,
+      "grad_norm": 0.5545350909233093,
+      "learning_rate": 8.585476550680788e-07,
+      "loss": 0.018215081691741943,
+      "step": 5400
+    },
+    {
+      "epoch": 0.9642730181301102,
+      "grad_norm": 0.304283082485199,
+      "learning_rate": 7.639939485627837e-07,
+      "loss": 0.019889332056045532,
+      "step": 5425
+    },
+    {
+      "epoch": 0.9687166725915393,
+      "grad_norm": 1.052090048789978,
+      "learning_rate": 6.694402420574887e-07,
+      "loss": 0.017396693229675294,
+      "step": 5450
+    },
+    {
+      "epoch": 0.9731603270529684,
+      "grad_norm": 0.1891939640045166,
+      "learning_rate": 5.748865355521937e-07,
+      "loss": 0.019253385066986085,
+      "step": 5475
+    },
+    {
+      "epoch": 0.9776039815143974,
+      "grad_norm": 0.5522451996803284,
+      "learning_rate": 4.803328290468987e-07,
+      "loss": 0.017252475023269653,
+      "step": 5500
+    },
+    {
+      "epoch": 0.9776039815143974,
+      "eval_accuracy": 0.93,
+      "eval_auroc": 0.9973359040925472,
+      "eval_f1": 0.9349442379182157,
+      "eval_loss": 0.024247920140624046,
+      "eval_runtime": 38.3653,
+      "eval_samples_per_second": 52.13,
+      "eval_steps_per_second": 1.642,
+      "eval_tpr_at_fpr1": 0.9055666003976143,
+      "eval_tpr_at_fpr5": 0.9880715705765407,
+      "step": 5500
+    },
+    {
+      "epoch": 0.9820476359758266,
+      "grad_norm": 1.228925347328186,
+      "learning_rate": 3.8577912254160366e-07,
+      "loss": 0.022175378799438476,
+      "step": 5525
+    },
+    {
+      "epoch": 0.9864912904372556,
+      "grad_norm": 0.3541058599948883,
+      "learning_rate": 2.9122541603630864e-07,
+      "loss": 0.019693093299865724,
+      "step": 5550
+    },
+    {
+      "epoch": 0.9909349448986847,
+      "grad_norm": 0.0981239303946495,
+      "learning_rate": 1.9667170953101364e-07,
+      "loss": 0.018846184015274048,
+      "step": 5575
+    },
+    {
+      "epoch": 0.9953785993601137,
+      "grad_norm": 0.9296360611915588,
+      "learning_rate": 1.021180030257186e-07,
+      "loss": 0.015072580575942993,
+      "step": 5600
+    },
+    {
+      "epoch": 0.9998222538215429,
+      "grad_norm": 1.3698371648788452,
+      "learning_rate": 7.564296520423602e-09,
+      "loss": 0.022552621364593507,
+      "step": 5625
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9275,
+      "eval_auroc": 0.9973979063246277,
+      "eval_f1": 0.9327770050996754,
+      "eval_loss": 0.02445497363805771,
+      "eval_runtime": 39.1622,
+      "eval_samples_per_second": 51.07,
+      "eval_steps_per_second": 1.609,
+      "eval_tpr_at_fpr1": 0.911530815109344,
+      "eval_tpr_at_fpr5": 0.989065606361829,
+      "step": 5626
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5626,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-5626/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b726f050f5029e1ef25800ffb43c1e5bcf5df8fde670427401e6bad8b3522c9c
+size 5329

meta.json CHANGED Viewed

@@ -36,6 +36,6 @@
     "wiki": 13
   },
   "num_domains": 14,
-  "n_train": 90001,
-  "n_val": 9999
 }

     "wiki": 13
   },
   "num_domains": 14,
+  "n_train": 180001,
+  "n_val": 19999
 }

onnx/detector_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "base_encoder": "microsoft/deberta-v3-base",
+  "feature_dim": 17,
+  "feat_hidden": 64,
+  "num_domains": 14,
+  "domain_loss_weight": 0.2,
+  "dropout": 0.1,
+  "focal_gamma": 2.0,
+  "focal_alpha": 0.85
+}

onnx/meta.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "feature_names": [
+    "flesch_reading_ease",
+    "flesch_kincaid_grade",
+    "gunning_fog",
+    "type_token_ratio",
+    "hapax_rate",
+    "avg_word_len",
+    "unique_word_frac",
+    "sentence_count_log",
+    "mean_sentence_len",
+    "sentence_len_std",
+    "sentence_len_cv",
+    "commas_per_sentence",
+    "punct_ratio",
+    "stopword_ratio",
+    "digit_ratio",
+    "upper_ratio",
+    "word_count_log"
+  ],
+  "feature_dim": 17,
+  "domain2id": {
+    "finance": 0,
+    "medicine": 1,
+    "mixed": 2,
+    "open_qa": 3,
+    "raid_abstracts": 4,
+    "raid_books": 5,
+    "raid_news": 6,
+    "raid_poetry": 7,
+    "raid_recipes": 8,
+    "raid_reddit": 9,
+    "raid_reviews": 10,
+    "raid_wiki": 11,
+    "reddit": 12,
+    "wiki": 13
+  },
+  "num_domains": 14,
+  "n_train": 180001,
+  "n_val": 19999
+}

onnx/model_fp16.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c7276779c6e36da49d4874434d39df0ca5e3e01de2a5444759d801c1c9978fe
 size 369348178

 version https://git-lfs.github.com/spec/v1
+oid sha256:af06fe93c34ea853ea573d1daa93683dfcb4b7ef80299f85a6b15ea23c64fcb0
 size 369348178

onnx/model_fp32.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae0789cd8acb2598fa3587d91eda4fc9a61475af094e8c9c35b1977b43027feb
 size 737710212

 version https://git-lfs.github.com/spec/v1
+oid sha256:5562b7713735b9141b8005c39460e866a5462c9bf409993dc54d70bab6143c45
 size 737710212

onnx/model_int8.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f358c0c1cb714ee66743f5ee14779a347d559236fc1cbe7e831f02730f50ce2
 size 243819134

 version https://git-lfs.github.com/spec/v1
+oid sha256:6dc3275bc3c4e0d89bc3cd1e9aacca98ee054e39e1e5b6099c7058ec06e3b1d6
 size 243819134

onnx/model_q4.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfbba2b2ba869c15af3c661bcd72cc1109eaf3e899e7737788afe5373e9eb246
 size 177709957

 version https://git-lfs.github.com/spec/v1
+oid sha256:c03511115935e5d337be758bfb127e316cf3e214c31f40eb3decd6dff424c58f
 size 177709957

onnx/raid_results.json ADDED Viewed

	@@ -0,0 +1,153 @@

+{
+  "overall": {
+    "n": 20000,
+    "accuracy": 0.98195,
+    "tpr_at_fpr5": 0.9146611698015975,
+    "tpr_at_fpr1": 0.7988662715794899,
+    "auroc": 0.9821251128640067
+  },
+  "by_attack": {
+    "alternative_spelling": {
+      "n": 2225,
+      "accuracy": 0.8728089887640449,
+      "tpr_at_fpr5": 0.9177914110429448,
+      "tpr_at_fpr1": 0.8165644171779141,
+      "auroc": 0.9824694540392843
+    },
+    "article_deletion": {
+      "n": 2231,
+      "accuracy": 0.8731510533393098,
+      "tpr_at_fpr5": 0.8905867970660146,
+      "tpr_at_fpr1": 0.7854523227383863,
+      "auroc": 0.9769421215919131
+    },
+    "homoglyph": {
+      "n": 2170,
+      "accuracy": 0.871889400921659,
+      "tpr_at_fpr5": 0.9326984126984127,
+      "tpr_at_fpr1": 0.8488888888888889,
+      "auroc": 0.9868662131519276
+    },
+    "insert_paragraphs": {
+      "n": 2215,
+      "accuracy": 0.8753950338600451,
+      "tpr_at_fpr5": 0.9302469135802469,
+      "tpr_at_fpr1": 0.8351851851851851,
+      "auroc": 0.9850887021475256
+    },
+    "none": {
+      "n": 2149,
+      "accuracy": 0.8711028385295486,
+      "tpr_at_fpr5": 0.9298584298584298,
+      "tpr_at_fpr1": 0.8532818532818532,
+      "auroc": 0.9860971415593265
+    },
+    "number": {
+      "n": 2211,
+      "accuracy": 0.8756218905472637,
+      "tpr_at_fpr5": 0.9282178217821783,
+      "tpr_at_fpr1": 0.8168316831683168,
+      "auroc": 0.9850663532739828
+    },
+    "paraphrase": {
+      "n": 2259,
+      "accuracy": 0.8764940239043825,
+      "tpr_at_fpr5": 0.8641826923076923,
+      "tpr_at_fpr1": 0.6219951923076923,
+      "auroc": 0.9738819085326438
+    },
+    "perplexity_misspelling": {
+      "n": 2259,
+      "accuracy": 0.8747233289065959,
+      "tpr_at_fpr5": 0.8990384615384616,
+      "tpr_at_fpr1": 0.7884615384615384,
+      "auroc": 0.9792622818358111
+    },
+    "synonym": {
+      "n": 2211,
+      "accuracy": 0.8715513342379014,
+      "tpr_at_fpr5": 0.9102722772277227,
+      "tpr_at_fpr1": 0.7623762376237624,
+      "auroc": 0.9792006406523005
+    },
+    "upper_lower": {
+      "n": 2197,
+      "accuracy": 0.8725534820209376,
+      "tpr_at_fpr5": 0.9257178526841449,
+      "tpr_at_fpr1": 0.8033707865168539,
+      "auroc": 0.9831041030644467
+    },
+    "whitespace": {
+      "n": 2208,
+      "accuracy": 0.8745471014492754,
+      "tpr_at_fpr5": 0.921264724116553,
+      "tpr_at_fpr1": 0.8239305641661501,
+      "auroc": 0.9838674217362083
+    },
+    "zero_width_space": {
+      "n": 2210,
+      "accuracy": 0.8742081447963801,
+      "tpr_at_fpr5": 0.9294117647058824,
+      "tpr_at_fpr1": 0.8390092879256966,
+      "auroc": 0.9843244790176133
+    }
+  },
+  "by_domain": {
+    "abstracts": {
+      "n": 3178,
+      "accuracy": 0.9128382630585273,
+      "tpr_at_fpr5": 0.9043747580332946,
+      "tpr_at_fpr1": 0.7557104142469996,
+      "auroc": 0.9824040185179763
+    },
+    "books": {
+      "n": 3235,
+      "accuracy": 0.9153013910355486,
+      "tpr_at_fpr5": 0.9704545454545455,
+      "tpr_at_fpr1": 0.9053030303030303,
+      "auroc": 0.9939209320091673
+    },
+    "news": {
+      "n": 3226,
+      "accuracy": 0.912275263484191,
+      "tpr_at_fpr5": 0.8749524895477004,
+      "tpr_at_fpr1": 0.7229190421892816,
+      "auroc": 0.9752428223284753
+    },
+    "poetry": {
+      "n": 3136,
+      "accuracy": 0.9075255102040817,
+      "tpr_at_fpr5": 0.885478158205431,
+      "tpr_at_fpr1": 0.7276662731208186,
+      "auroc": 0.9738301932343185
+    },
+    "recipes": {
+      "n": 3148,
+      "accuracy": 0.9113722998729352,
+      "tpr_at_fpr5": 0.9498629063846455,
+      "tpr_at_fpr1": 0.8527222875048962,
+      "auroc": 0.989551919475193
+    },
+    "reddit": {
+      "n": 3179,
+      "accuracy": 0.9053161371500472,
+      "tpr_at_fpr5": 0.9036377708978328,
+      "tpr_at_fpr1": 0.81656346749226,
+      "auroc": 0.9770247417852589
+    },
+    "reviews": {
+      "n": 1933,
+      "accuracy": 0.8572167615106053,
+      "tpr_at_fpr5": 0.9200298953662183,
+      "tpr_at_fpr1": 0.8243647234678625,
+      "auroc": 0.9841969074625365
+    },
+    "wiki": {
+      "n": 3130,
+      "accuracy": 0.9089456869009584,
+      "tpr_at_fpr5": 0.9104536489151874,
+      "tpr_at_fpr1": 0.7964497041420119,
+      "auroc": 0.9816398985629755
+    }
+  }
+}

onnx/raid_submission.json ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6565545394e9e633e77dc60ff996c310401159ebef576b4805aec322823a6c0c
+size 736857075

raid_results.json ADDED Viewed

	@@ -0,0 +1,153 @@

+{
+  "overall": {
+    "n": 20000,
+    "accuracy": 0.98195,
+    "tpr_at_fpr5": 0.9146611698015975,
+    "tpr_at_fpr1": 0.7988662715794899,
+    "auroc": 0.9821251128640067
+  },
+  "by_attack": {
+    "alternative_spelling": {
+      "n": 2225,
+      "accuracy": 0.8728089887640449,
+      "tpr_at_fpr5": 0.9177914110429448,
+      "tpr_at_fpr1": 0.8165644171779141,
+      "auroc": 0.9824694540392843
+    },
+    "article_deletion": {
+      "n": 2231,
+      "accuracy": 0.8731510533393098,
+      "tpr_at_fpr5": 0.8905867970660146,
+      "tpr_at_fpr1": 0.7854523227383863,
+      "auroc": 0.9769421215919131
+    },
+    "homoglyph": {
+      "n": 2170,
+      "accuracy": 0.871889400921659,
+      "tpr_at_fpr5": 0.9326984126984127,
+      "tpr_at_fpr1": 0.8488888888888889,
+      "auroc": 0.9868662131519276
+    },
+    "insert_paragraphs": {
+      "n": 2215,
+      "accuracy": 0.8753950338600451,
+      "tpr_at_fpr5": 0.9302469135802469,
+      "tpr_at_fpr1": 0.8351851851851851,
+      "auroc": 0.9850887021475256
+    },
+    "none": {
+      "n": 2149,
+      "accuracy": 0.8711028385295486,
+      "tpr_at_fpr5": 0.9298584298584298,
+      "tpr_at_fpr1": 0.8532818532818532,
+      "auroc": 0.9860971415593265
+    },
+    "number": {
+      "n": 2211,
+      "accuracy": 0.8756218905472637,
+      "tpr_at_fpr5": 0.9282178217821783,
+      "tpr_at_fpr1": 0.8168316831683168,
+      "auroc": 0.9850663532739828
+    },
+    "paraphrase": {
+      "n": 2259,
+      "accuracy": 0.8764940239043825,
+      "tpr_at_fpr5": 0.8641826923076923,
+      "tpr_at_fpr1": 0.6219951923076923,
+      "auroc": 0.9738819085326438
+    },
+    "perplexity_misspelling": {
+      "n": 2259,
+      "accuracy": 0.8747233289065959,
+      "tpr_at_fpr5": 0.8990384615384616,
+      "tpr_at_fpr1": 0.7884615384615384,
+      "auroc": 0.9792622818358111
+    },
+    "synonym": {
+      "n": 2211,
+      "accuracy": 0.8715513342379014,
+      "tpr_at_fpr5": 0.9102722772277227,
+      "tpr_at_fpr1": 0.7623762376237624,
+      "auroc": 0.9792006406523005
+    },
+    "upper_lower": {
+      "n": 2197,
+      "accuracy": 0.8725534820209376,
+      "tpr_at_fpr5": 0.9257178526841449,
+      "tpr_at_fpr1": 0.8033707865168539,
+      "auroc": 0.9831041030644467
+    },
+    "whitespace": {
+      "n": 2208,
+      "accuracy": 0.8745471014492754,
+      "tpr_at_fpr5": 0.921264724116553,
+      "tpr_at_fpr1": 0.8239305641661501,
+      "auroc": 0.9838674217362083
+    },
+    "zero_width_space": {
+      "n": 2210,
+      "accuracy": 0.8742081447963801,
+      "tpr_at_fpr5": 0.9294117647058824,
+      "tpr_at_fpr1": 0.8390092879256966,
+      "auroc": 0.9843244790176133
+    }
+  },
+  "by_domain": {
+    "abstracts": {
+      "n": 3178,
+      "accuracy": 0.9128382630585273,
+      "tpr_at_fpr5": 0.9043747580332946,
+      "tpr_at_fpr1": 0.7557104142469996,
+      "auroc": 0.9824040185179763
+    },
+    "books": {
+      "n": 3235,
+      "accuracy": 0.9153013910355486,
+      "tpr_at_fpr5": 0.9704545454545455,
+      "tpr_at_fpr1": 0.9053030303030303,
+      "auroc": 0.9939209320091673
+    },
+    "news": {
+      "n": 3226,
+      "accuracy": 0.912275263484191,
+      "tpr_at_fpr5": 0.8749524895477004,
+      "tpr_at_fpr1": 0.7229190421892816,
+      "auroc": 0.9752428223284753
+    },
+    "poetry": {
+      "n": 3136,
+      "accuracy": 0.9075255102040817,
+      "tpr_at_fpr5": 0.885478158205431,
+      "tpr_at_fpr1": 0.7276662731208186,
+      "auroc": 0.9738301932343185
+    },
+    "recipes": {
+      "n": 3148,
+      "accuracy": 0.9113722998729352,
+      "tpr_at_fpr5": 0.9498629063846455,
+      "tpr_at_fpr1": 0.8527222875048962,
+      "auroc": 0.989551919475193
+    },
+    "reddit": {
+      "n": 3179,
+      "accuracy": 0.9053161371500472,
+      "tpr_at_fpr5": 0.9036377708978328,
+      "tpr_at_fpr1": 0.81656346749226,
+      "auroc": 0.9770247417852589
+    },
+    "reviews": {
+      "n": 1933,
+      "accuracy": 0.8572167615106053,
+      "tpr_at_fpr5": 0.9200298953662183,
+      "tpr_at_fpr1": 0.8243647234678625,
+      "auroc": 0.9841969074625365
+    },
+    "wiki": {
+      "n": 3130,
+      "accuracy": 0.9089456869009584,
+      "tpr_at_fpr5": 0.9104536489151874,
+      "tpr_at_fpr1": 0.7964497041420119,
+      "auroc": 0.9816398985629755
+    }
+  }
+}