Upload 11 files

Browse files

Files changed (11) hide show

config.json +71 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +55 -0
trainer_state.json +1586 -0
training_args.bin +3 -0
vocab.txt +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "_name_or_path": "../../bert",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Agricultural and Biological Sciences",
+    "1": "Biochemistry, Genetics and Molecular Biology",
+    "2": "Chemical Engineering",
+    "3": "Chemistry",
+    "4": "Computer Science",
+    "5": "Earth and Planetary",
+    "6": "Economics, Econometrics and Finance",
+    "7": "Engineering",
+    "8": "Food Science",
+    "9": "Immunology and Microbiology",
+    "10": "Materials Science",
+    "11": "Mathematics",
+    "12": "Medicine and Dentistry",
+    "13": "Neuroscience",
+    "14": "Nursing and Health Professions",
+    "15": "Pharmacology, Toxicology and Pharmaceutical Science",
+    "16": "Physics and Astronomy",
+    "17": "Psychology",
+    "18": "Social Sciences",
+    "19": "Veterinary Science and Veterinary Medicine"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "Agricultural and Biological Sciences": 0,
+    "Biochemistry, Genetics and Molecular Biology": 1,
+    "Chemical Engineering": 2,
+    "Chemistry": 3,
+    "Computer Science": 4,
+    "Earth and Planetary": 5,
+    "Economics, Econometrics and Finance": 6,
+    "Engineering": 7,
+    "Food Science": 8,
+    "Immunology and Microbiology": 9,
+    "Materials Science": 10,
+    "Mathematics": 11,
+    "Medicine and Dentistry": 12,
+    "Neuroscience": 13,
+    "Nursing and Health Professions": 14,
+    "Pharmacology, Toxicology and Pharmaceutical Science": 15,
+    "Physics and Astronomy": 16,
+    "Psychology": 17,
+    "Social Sciences": 18,
+    "Veterinary Science and Veterinary Medicine": 19
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "multi_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e5f7e9bad60fe639dedd44471ae1cf5d97f22ebad783fe753dbc72ed593132
+size 438014016

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bbb1ce5faf8f2cb9776c5566d987764c02e434caf69a6cbf2fa200178735f92
+size 876149114

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72351c5ca54abf9f46ad9d15a8a913849131486ed6f9ad78ca81a1c8aac71b82
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e27a7631631d7147f4c96c542ba700cdde3381d1d59738c37fc7222b83ff3ab9
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1586 @@

+{
+  "best_metric": 0.759579488098438,
+  "best_model_checkpoint": "bert-finetuned-sem_eval-english\\checkpoint-85976",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 107470,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.023262305759746905,
+      "grad_norm": 0.5290595293045044,
+      "learning_rate": 1.9906950776961013e-05,
+      "loss": 0.2421,
+      "step": 500
+    },
+    {
+      "epoch": 0.04652461151949381,
+      "grad_norm": 0.7135451436042786,
+      "learning_rate": 1.9813901553922027e-05,
+      "loss": 0.153,
+      "step": 1000
+    },
+    {
+      "epoch": 0.06978691727924072,
+      "grad_norm": 0.6057224869728088,
+      "learning_rate": 1.9720852330883038e-05,
+      "loss": 0.135,
+      "step": 1500
+    },
+    {
+      "epoch": 0.09304922303898762,
+      "grad_norm": 2.033938407897949,
+      "learning_rate": 1.962780310784405e-05,
+      "loss": 0.1273,
+      "step": 2000
+    },
+    {
+      "epoch": 0.11631152879873453,
+      "grad_norm": 1.6713635921478271,
+      "learning_rate": 1.9534753884805063e-05,
+      "loss": 0.1221,
+      "step": 2500
+    },
+    {
+      "epoch": 0.13957383455848144,
+      "grad_norm": 0.4952755570411682,
+      "learning_rate": 1.9441704661766077e-05,
+      "loss": 0.116,
+      "step": 3000
+    },
+    {
+      "epoch": 0.16283614031822835,
+      "grad_norm": 0.5051670670509338,
+      "learning_rate": 1.9348655438727088e-05,
+      "loss": 0.1147,
+      "step": 3500
+    },
+    {
+      "epoch": 0.18609844607797524,
+      "grad_norm": 0.9887399077415466,
+      "learning_rate": 1.92556062156881e-05,
+      "loss": 0.1152,
+      "step": 4000
+    },
+    {
+      "epoch": 0.20936075183772215,
+      "grad_norm": 0.7931112051010132,
+      "learning_rate": 1.9162556992649114e-05,
+      "loss": 0.1138,
+      "step": 4500
+    },
+    {
+      "epoch": 0.23262305759746907,
+      "grad_norm": 0.7627053260803223,
+      "learning_rate": 1.9069507769610124e-05,
+      "loss": 0.113,
+      "step": 5000
+    },
+    {
+      "epoch": 0.25588536335721596,
+      "grad_norm": 0.6043164730072021,
+      "learning_rate": 1.897645854657114e-05,
+      "loss": 0.1119,
+      "step": 5500
+    },
+    {
+      "epoch": 0.27914766911696287,
+      "grad_norm": 0.5762751698493958,
+      "learning_rate": 1.888340932353215e-05,
+      "loss": 0.1107,
+      "step": 6000
+    },
+    {
+      "epoch": 0.3024099748767098,
+      "grad_norm": 0.6853972673416138,
+      "learning_rate": 1.8790360100493164e-05,
+      "loss": 0.1075,
+      "step": 6500
+    },
+    {
+      "epoch": 0.3256722806364567,
+      "grad_norm": 0.6911000609397888,
+      "learning_rate": 1.8697310877454175e-05,
+      "loss": 0.1079,
+      "step": 7000
+    },
+    {
+      "epoch": 0.3489345863962036,
+      "grad_norm": 0.5828253626823425,
+      "learning_rate": 1.860426165441519e-05,
+      "loss": 0.1091,
+      "step": 7500
+    },
+    {
+      "epoch": 0.3721968921559505,
+      "grad_norm": 0.782696008682251,
+      "learning_rate": 1.85112124313762e-05,
+      "loss": 0.1084,
+      "step": 8000
+    },
+    {
+      "epoch": 0.3954591979156974,
+      "grad_norm": 0.8830183148384094,
+      "learning_rate": 1.841816320833721e-05,
+      "loss": 0.1059,
+      "step": 8500
+    },
+    {
+      "epoch": 0.4187215036754443,
+      "grad_norm": 1.4506700038909912,
+      "learning_rate": 1.8325113985298225e-05,
+      "loss": 0.1058,
+      "step": 9000
+    },
+    {
+      "epoch": 0.4419838094351912,
+      "grad_norm": 1.0449475049972534,
+      "learning_rate": 1.8232064762259236e-05,
+      "loss": 0.1067,
+      "step": 9500
+    },
+    {
+      "epoch": 0.46524611519493814,
+      "grad_norm": 1.4629307985305786,
+      "learning_rate": 1.8139015539220247e-05,
+      "loss": 0.1031,
+      "step": 10000
+    },
+    {
+      "epoch": 0.48850842095468505,
+      "grad_norm": 0.5856618285179138,
+      "learning_rate": 1.804596631618126e-05,
+      "loss": 0.1064,
+      "step": 10500
+    },
+    {
+      "epoch": 0.5117707267144319,
+      "grad_norm": 0.7551602125167847,
+      "learning_rate": 1.7952917093142276e-05,
+      "loss": 0.1029,
+      "step": 11000
+    },
+    {
+      "epoch": 0.5350330324741789,
+      "grad_norm": 0.762008011341095,
+      "learning_rate": 1.7859867870103287e-05,
+      "loss": 0.1043,
+      "step": 11500
+    },
+    {
+      "epoch": 0.5582953382339257,
+      "grad_norm": 0.9580531716346741,
+      "learning_rate": 1.7766818647064298e-05,
+      "loss": 0.1048,
+      "step": 12000
+    },
+    {
+      "epoch": 0.5815576439936726,
+      "grad_norm": 1.0684598684310913,
+      "learning_rate": 1.7673769424025312e-05,
+      "loss": 0.1041,
+      "step": 12500
+    },
+    {
+      "epoch": 0.6048199497534196,
+      "grad_norm": 1.6208114624023438,
+      "learning_rate": 1.7580720200986323e-05,
+      "loss": 0.1005,
+      "step": 13000
+    },
+    {
+      "epoch": 0.6280822555131664,
+      "grad_norm": 1.0143775939941406,
+      "learning_rate": 1.7487670977947334e-05,
+      "loss": 0.1012,
+      "step": 13500
+    },
+    {
+      "epoch": 0.6513445612729134,
+      "grad_norm": 0.8733798861503601,
+      "learning_rate": 1.7394621754908348e-05,
+      "loss": 0.1009,
+      "step": 14000
+    },
+    {
+      "epoch": 0.6746068670326603,
+      "grad_norm": 1.1354494094848633,
+      "learning_rate": 1.730157253186936e-05,
+      "loss": 0.1012,
+      "step": 14500
+    },
+    {
+      "epoch": 0.6978691727924072,
+      "grad_norm": 0.852428674697876,
+      "learning_rate": 1.720852330883037e-05,
+      "loss": 0.1003,
+      "step": 15000
+    },
+    {
+      "epoch": 0.7211314785521541,
+      "grad_norm": 0.6298101544380188,
+      "learning_rate": 1.7115474085791384e-05,
+      "loss": 0.1009,
+      "step": 15500
+    },
+    {
+      "epoch": 0.744393784311901,
+      "grad_norm": 0.6968585252761841,
+      "learning_rate": 1.70224248627524e-05,
+      "loss": 0.1002,
+      "step": 16000
+    },
+    {
+      "epoch": 0.7676560900716479,
+      "grad_norm": 0.7687616944313049,
+      "learning_rate": 1.692937563971341e-05,
+      "loss": 0.0976,
+      "step": 16500
+    },
+    {
+      "epoch": 0.7909183958313948,
+      "grad_norm": 0.9958521723747253,
+      "learning_rate": 1.6836326416674424e-05,
+      "loss": 0.0996,
+      "step": 17000
+    },
+    {
+      "epoch": 0.8141807015911418,
+      "grad_norm": 1.9353946447372437,
+      "learning_rate": 1.6743277193635435e-05,
+      "loss": 0.0982,
+      "step": 17500
+    },
+    {
+      "epoch": 0.8374430073508886,
+      "grad_norm": 0.4970337450504303,
+      "learning_rate": 1.6650227970596446e-05,
+      "loss": 0.1013,
+      "step": 18000
+    },
+    {
+      "epoch": 0.8607053131106355,
+      "grad_norm": 1.3484145402908325,
+      "learning_rate": 1.655717874755746e-05,
+      "loss": 0.1001,
+      "step": 18500
+    },
+    {
+      "epoch": 0.8839676188703824,
+      "grad_norm": 0.9516633749008179,
+      "learning_rate": 1.646412952451847e-05,
+      "loss": 0.0978,
+      "step": 19000
+    },
+    {
+      "epoch": 0.9072299246301293,
+      "grad_norm": 1.358478307723999,
+      "learning_rate": 1.6371080301479485e-05,
+      "loss": 0.1016,
+      "step": 19500
+    },
+    {
+      "epoch": 0.9304922303898763,
+      "grad_norm": 0.5643423795700073,
+      "learning_rate": 1.6278031078440496e-05,
+      "loss": 0.0983,
+      "step": 20000
+    },
+    {
+      "epoch": 0.9537545361496231,
+      "grad_norm": 0.532564103603363,
+      "learning_rate": 1.618498185540151e-05,
+      "loss": 0.0972,
+      "step": 20500
+    },
+    {
+      "epoch": 0.9770168419093701,
+      "grad_norm": 1.3922828435897827,
+      "learning_rate": 1.609193263236252e-05,
+      "loss": 0.0976,
+      "step": 21000
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5307062436028659,
+      "eval_f1": 0.7361535927721048,
+      "eval_loss": 0.09595564752817154,
+      "eval_roc_auc": 0.8461935311283144,
+      "eval_runtime": 24.262,
+      "eval_samples_per_second": 885.911,
+      "eval_steps_per_second": 110.749,
+      "step": 21494
+    },
+    {
+      "epoch": 1.0002791476691169,
+      "grad_norm": 0.6412343978881836,
+      "learning_rate": 1.5998883409323532e-05,
+      "loss": 0.0986,
+      "step": 21500
+    },
+    {
+      "epoch": 1.0235414534288638,
+      "grad_norm": 0.6500938534736633,
+      "learning_rate": 1.5905834186284547e-05,
+      "loss": 0.0889,
+      "step": 22000
+    },
+    {
+      "epoch": 1.0468037591886108,
+      "grad_norm": 0.49381542205810547,
+      "learning_rate": 1.5812784963245558e-05,
+      "loss": 0.0861,
+      "step": 22500
+    },
+    {
+      "epoch": 1.0700660649483578,
+      "grad_norm": 0.790490448474884,
+      "learning_rate": 1.571973574020657e-05,
+      "loss": 0.0853,
+      "step": 23000
+    },
+    {
+      "epoch": 1.0933283707081045,
+      "grad_norm": 1.0638964176177979,
+      "learning_rate": 1.5626686517167583e-05,
+      "loss": 0.0855,
+      "step": 23500
+    },
+    {
+      "epoch": 1.1165906764678515,
+      "grad_norm": 0.8379644751548767,
+      "learning_rate": 1.5533637294128597e-05,
+      "loss": 0.0839,
+      "step": 24000
+    },
+    {
+      "epoch": 1.1398529822275985,
+      "grad_norm": 1.5022120475769043,
+      "learning_rate": 1.5440588071089608e-05,
+      "loss": 0.0883,
+      "step": 24500
+    },
+    {
+      "epoch": 1.1631152879873454,
+      "grad_norm": 0.6825814247131348,
+      "learning_rate": 1.534753884805062e-05,
+      "loss": 0.0852,
+      "step": 25000
+    },
+    {
+      "epoch": 1.1863775937470922,
+      "grad_norm": 2.154118776321411,
+      "learning_rate": 1.5254489625011633e-05,
+      "loss": 0.0854,
+      "step": 25500
+    },
+    {
+      "epoch": 1.2096398995068391,
+      "grad_norm": 0.8945685029029846,
+      "learning_rate": 1.5161440401972644e-05,
+      "loss": 0.0862,
+      "step": 26000
+    },
+    {
+      "epoch": 1.2329022052665861,
+      "grad_norm": 1.1257520914077759,
+      "learning_rate": 1.5068391178933657e-05,
+      "loss": 0.0832,
+      "step": 26500
+    },
+    {
+      "epoch": 1.2561645110263329,
+      "grad_norm": 0.785380482673645,
+      "learning_rate": 1.497534195589467e-05,
+      "loss": 0.0842,
+      "step": 27000
+    },
+    {
+      "epoch": 1.2794268167860798,
+      "grad_norm": 0.4283202290534973,
+      "learning_rate": 1.488229273285568e-05,
+      "loss": 0.0859,
+      "step": 27500
+    },
+    {
+      "epoch": 1.3026891225458268,
+      "grad_norm": 1.3082115650177002,
+      "learning_rate": 1.4789243509816695e-05,
+      "loss": 0.0824,
+      "step": 28000
+    },
+    {
+      "epoch": 1.3259514283055736,
+      "grad_norm": 0.6663931608200073,
+      "learning_rate": 1.4696194286777707e-05,
+      "loss": 0.0868,
+      "step": 28500
+    },
+    {
+      "epoch": 1.3492137340653205,
+      "grad_norm": 1.093483805656433,
+      "learning_rate": 1.460314506373872e-05,
+      "loss": 0.0871,
+      "step": 29000
+    },
+    {
+      "epoch": 1.3724760398250675,
+      "grad_norm": 1.7370342016220093,
+      "learning_rate": 1.4510095840699731e-05,
+      "loss": 0.0858,
+      "step": 29500
+    },
+    {
+      "epoch": 1.3957383455848142,
+      "grad_norm": 1.280945062637329,
+      "learning_rate": 1.4417046617660744e-05,
+      "loss": 0.0853,
+      "step": 30000
+    },
+    {
+      "epoch": 1.4190006513445612,
+      "grad_norm": 0.44828563928604126,
+      "learning_rate": 1.4323997394621756e-05,
+      "loss": 0.0828,
+      "step": 30500
+    },
+    {
+      "epoch": 1.4422629571043082,
+      "grad_norm": 1.0420727729797363,
+      "learning_rate": 1.4230948171582769e-05,
+      "loss": 0.0836,
+      "step": 31000
+    },
+    {
+      "epoch": 1.4655252628640552,
+      "grad_norm": 1.529417634010315,
+      "learning_rate": 1.413789894854378e-05,
+      "loss": 0.0827,
+      "step": 31500
+    },
+    {
+      "epoch": 1.4887875686238021,
+      "grad_norm": 0.8359895944595337,
+      "learning_rate": 1.4044849725504792e-05,
+      "loss": 0.0836,
+      "step": 32000
+    },
+    {
+      "epoch": 1.5120498743835489,
+      "grad_norm": 0.8840139508247375,
+      "learning_rate": 1.3951800502465807e-05,
+      "loss": 0.0858,
+      "step": 32500
+    },
+    {
+      "epoch": 1.5353121801432958,
+      "grad_norm": 1.2289206981658936,
+      "learning_rate": 1.385875127942682e-05,
+      "loss": 0.0835,
+      "step": 33000
+    },
+    {
+      "epoch": 1.5585744859030428,
+      "grad_norm": 1.725092887878418,
+      "learning_rate": 1.376570205638783e-05,
+      "loss": 0.0833,
+      "step": 33500
+    },
+    {
+      "epoch": 1.5818367916627896,
+      "grad_norm": 1.8183008432388306,
+      "learning_rate": 1.3672652833348843e-05,
+      "loss": 0.085,
+      "step": 34000
+    },
+    {
+      "epoch": 1.6050990974225365,
+      "grad_norm": 1.5482715368270874,
+      "learning_rate": 1.3579603610309855e-05,
+      "loss": 0.0825,
+      "step": 34500
+    },
+    {
+      "epoch": 1.6283614031822835,
+      "grad_norm": 2.1967756748199463,
+      "learning_rate": 1.3486554387270866e-05,
+      "loss": 0.0857,
+      "step": 35000
+    },
+    {
+      "epoch": 1.6516237089420303,
+      "grad_norm": 0.9423213005065918,
+      "learning_rate": 1.3393505164231879e-05,
+      "loss": 0.0818,
+      "step": 35500
+    },
+    {
+      "epoch": 1.6748860147017772,
+      "grad_norm": 0.8981990218162537,
+      "learning_rate": 1.3300455941192892e-05,
+      "loss": 0.0821,
+      "step": 36000
+    },
+    {
+      "epoch": 1.6981483204615242,
+      "grad_norm": 1.125404715538025,
+      "learning_rate": 1.3207406718153904e-05,
+      "loss": 0.0849,
+      "step": 36500
+    },
+    {
+      "epoch": 1.721410626221271,
+      "grad_norm": 2.038687229156494,
+      "learning_rate": 1.3114357495114918e-05,
+      "loss": 0.0847,
+      "step": 37000
+    },
+    {
+      "epoch": 1.744672931981018,
+      "grad_norm": 1.0153677463531494,
+      "learning_rate": 1.302130827207593e-05,
+      "loss": 0.084,
+      "step": 37500
+    },
+    {
+      "epoch": 1.7679352377407649,
+      "grad_norm": 1.0724354982376099,
+      "learning_rate": 1.2928259049036942e-05,
+      "loss": 0.0825,
+      "step": 38000
+    },
+    {
+      "epoch": 1.7911975435005116,
+      "grad_norm": 0.6580795645713806,
+      "learning_rate": 1.2835209825997955e-05,
+      "loss": 0.0843,
+      "step": 38500
+    },
+    {
+      "epoch": 1.8144598492602588,
+      "grad_norm": 1.5284615755081177,
+      "learning_rate": 1.2742160602958966e-05,
+      "loss": 0.0858,
+      "step": 39000
+    },
+    {
+      "epoch": 1.8377221550200056,
+      "grad_norm": 2.040937900543213,
+      "learning_rate": 1.2649111379919978e-05,
+      "loss": 0.0838,
+      "step": 39500
+    },
+    {
+      "epoch": 1.8609844607797523,
+      "grad_norm": 1.1230378150939941,
+      "learning_rate": 1.255606215688099e-05,
+      "loss": 0.0822,
+      "step": 40000
+    },
+    {
+      "epoch": 1.8842467665394995,
+      "grad_norm": 0.6881332397460938,
+      "learning_rate": 1.2463012933842002e-05,
+      "loss": 0.0827,
+      "step": 40500
+    },
+    {
+      "epoch": 1.9075090722992463,
+      "grad_norm": 1.4516489505767822,
+      "learning_rate": 1.2369963710803016e-05,
+      "loss": 0.0797,
+      "step": 41000
+    },
+    {
+      "epoch": 1.9307713780589932,
+      "grad_norm": 0.8135964870452881,
+      "learning_rate": 1.2276914487764029e-05,
+      "loss": 0.0823,
+      "step": 41500
+    },
+    {
+      "epoch": 1.9540336838187402,
+      "grad_norm": 0.51099693775177,
+      "learning_rate": 1.2183865264725041e-05,
+      "loss": 0.0819,
+      "step": 42000
+    },
+    {
+      "epoch": 1.977295989578487,
+      "grad_norm": 1.2015933990478516,
+      "learning_rate": 1.2090816041686054e-05,
+      "loss": 0.0826,
+      "step": 42500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.5615520610402903,
+      "eval_f1": 0.7499260228316903,
+      "eval_loss": 0.09278739243745804,
+      "eval_roc_auc": 0.8556249389001388,
+      "eval_runtime": 23.2564,
+      "eval_samples_per_second": 924.221,
+      "eval_steps_per_second": 115.538,
+      "step": 42988
+    },
+    {
+      "epoch": 2.0005582953382337,
+      "grad_norm": 1.1591126918792725,
+      "learning_rate": 1.1997766818647065e-05,
+      "loss": 0.0807,
+      "step": 43000
+    },
+    {
+      "epoch": 2.023820601097981,
+      "grad_norm": 0.6749496459960938,
+      "learning_rate": 1.1904717595608077e-05,
+      "loss": 0.07,
+      "step": 43500
+    },
+    {
+      "epoch": 2.0470829068577276,
+      "grad_norm": 1.0793603658676147,
+      "learning_rate": 1.181166837256909e-05,
+      "loss": 0.0682,
+      "step": 44000
+    },
+    {
+      "epoch": 2.070345212617475,
+      "grad_norm": 1.9193094968795776,
+      "learning_rate": 1.1718619149530101e-05,
+      "loss": 0.0659,
+      "step": 44500
+    },
+    {
+      "epoch": 2.0936075183772216,
+      "grad_norm": 2.0090835094451904,
+      "learning_rate": 1.1625569926491114e-05,
+      "loss": 0.0661,
+      "step": 45000
+    },
+    {
+      "epoch": 2.1168698241369683,
+      "grad_norm": 1.252321720123291,
+      "learning_rate": 1.1532520703452128e-05,
+      "loss": 0.0663,
+      "step": 45500
+    },
+    {
+      "epoch": 2.1401321298967155,
+      "grad_norm": 0.25603464245796204,
+      "learning_rate": 1.143947148041314e-05,
+      "loss": 0.0671,
+      "step": 46000
+    },
+    {
+      "epoch": 2.1633944356564623,
+      "grad_norm": 0.9230429530143738,
+      "learning_rate": 1.1346422257374151e-05,
+      "loss": 0.0654,
+      "step": 46500
+    },
+    {
+      "epoch": 2.186656741416209,
+      "grad_norm": 0.8180581331253052,
+      "learning_rate": 1.1253373034335164e-05,
+      "loss": 0.0675,
+      "step": 47000
+    },
+    {
+      "epoch": 2.209919047175956,
+      "grad_norm": 1.928276777267456,
+      "learning_rate": 1.1160323811296177e-05,
+      "loss": 0.0686,
+      "step": 47500
+    },
+    {
+      "epoch": 2.233181352935703,
+      "grad_norm": 1.222936987876892,
+      "learning_rate": 1.106727458825719e-05,
+      "loss": 0.0706,
+      "step": 48000
+    },
+    {
+      "epoch": 2.2564436586954497,
+      "grad_norm": 0.6796595454216003,
+      "learning_rate": 1.09742253652182e-05,
+      "loss": 0.0681,
+      "step": 48500
+    },
+    {
+      "epoch": 2.279705964455197,
+      "grad_norm": 1.2472426891326904,
+      "learning_rate": 1.0881176142179213e-05,
+      "loss": 0.0677,
+      "step": 49000
+    },
+    {
+      "epoch": 2.3029682702149437,
+      "grad_norm": 1.2157268524169922,
+      "learning_rate": 1.0788126919140227e-05,
+      "loss": 0.0688,
+      "step": 49500
+    },
+    {
+      "epoch": 2.326230575974691,
+      "grad_norm": 1.0946940183639526,
+      "learning_rate": 1.069507769610124e-05,
+      "loss": 0.0669,
+      "step": 50000
+    },
+    {
+      "epoch": 2.3494928817344376,
+      "grad_norm": 2.56750750541687,
+      "learning_rate": 1.060202847306225e-05,
+      "loss": 0.065,
+      "step": 50500
+    },
+    {
+      "epoch": 2.3727551874941843,
+      "grad_norm": 0.8625161051750183,
+      "learning_rate": 1.0508979250023263e-05,
+      "loss": 0.0676,
+      "step": 51000
+    },
+    {
+      "epoch": 2.3960174932539315,
+      "grad_norm": 1.6813982725143433,
+      "learning_rate": 1.0415930026984276e-05,
+      "loss": 0.0669,
+      "step": 51500
+    },
+    {
+      "epoch": 2.4192797990136783,
+      "grad_norm": 1.76870596408844,
+      "learning_rate": 1.0322880803945287e-05,
+      "loss": 0.0704,
+      "step": 52000
+    },
+    {
+      "epoch": 2.442542104773425,
+      "grad_norm": 0.8901593089103699,
+      "learning_rate": 1.02298315809063e-05,
+      "loss": 0.0703,
+      "step": 52500
+    },
+    {
+      "epoch": 2.4658044105331722,
+      "grad_norm": 0.7651998400688171,
+      "learning_rate": 1.0136782357867312e-05,
+      "loss": 0.0685,
+      "step": 53000
+    },
+    {
+      "epoch": 2.489066716292919,
+      "grad_norm": 1.2652794122695923,
+      "learning_rate": 1.0043733134828325e-05,
+      "loss": 0.0653,
+      "step": 53500
+    },
+    {
+      "epoch": 2.5123290220526657,
+      "grad_norm": 1.768955111503601,
+      "learning_rate": 9.950683911789337e-06,
+      "loss": 0.0686,
+      "step": 54000
+    },
+    {
+      "epoch": 2.535591327812413,
+      "grad_norm": 1.6044102907180786,
+      "learning_rate": 9.85763468875035e-06,
+      "loss": 0.0676,
+      "step": 54500
+    },
+    {
+      "epoch": 2.5588536335721597,
+      "grad_norm": 1.808396816253662,
+      "learning_rate": 9.764585465711363e-06,
+      "loss": 0.0662,
+      "step": 55000
+    },
+    {
+      "epoch": 2.5821159393319064,
+      "grad_norm": 1.0778286457061768,
+      "learning_rate": 9.671536242672375e-06,
+      "loss": 0.0689,
+      "step": 55500
+    },
+    {
+      "epoch": 2.6053782450916536,
+      "grad_norm": 2.739319324493408,
+      "learning_rate": 9.578487019633386e-06,
+      "loss": 0.068,
+      "step": 56000
+    },
+    {
+      "epoch": 2.6286405508514004,
+      "grad_norm": 1.368030071258545,
+      "learning_rate": 9.485437796594399e-06,
+      "loss": 0.0655,
+      "step": 56500
+    },
+    {
+      "epoch": 2.651902856611147,
+      "grad_norm": 0.30945539474487305,
+      "learning_rate": 9.392388573555411e-06,
+      "loss": 0.0649,
+      "step": 57000
+    },
+    {
+      "epoch": 2.6751651623708943,
+      "grad_norm": 0.8296416997909546,
+      "learning_rate": 9.299339350516424e-06,
+      "loss": 0.0677,
+      "step": 57500
+    },
+    {
+      "epoch": 2.698427468130641,
+      "grad_norm": 1.1620192527770996,
+      "learning_rate": 9.206290127477437e-06,
+      "loss": 0.069,
+      "step": 58000
+    },
+    {
+      "epoch": 2.721689773890388,
+      "grad_norm": 0.9376591444015503,
+      "learning_rate": 9.11324090443845e-06,
+      "loss": 0.0662,
+      "step": 58500
+    },
+    {
+      "epoch": 2.744952079650135,
+      "grad_norm": 1.6231029033660889,
+      "learning_rate": 9.02019168139946e-06,
+      "loss": 0.0674,
+      "step": 59000
+    },
+    {
+      "epoch": 2.7682143854098817,
+      "grad_norm": 1.0340408086776733,
+      "learning_rate": 8.927142458360474e-06,
+      "loss": 0.0685,
+      "step": 59500
+    },
+    {
+      "epoch": 2.7914766911696285,
+      "grad_norm": 1.5797666311264038,
+      "learning_rate": 8.834093235321485e-06,
+      "loss": 0.065,
+      "step": 60000
+    },
+    {
+      "epoch": 2.8147389969293757,
+      "grad_norm": 0.9955604076385498,
+      "learning_rate": 8.741044012282498e-06,
+      "loss": 0.0682,
+      "step": 60500
+    },
+    {
+      "epoch": 2.8380013026891224,
+      "grad_norm": 2.2507500648498535,
+      "learning_rate": 8.64799478924351e-06,
+      "loss": 0.0651,
+      "step": 61000
+    },
+    {
+      "epoch": 2.861263608448869,
+      "grad_norm": 0.9272844791412354,
+      "learning_rate": 8.554945566204523e-06,
+      "loss": 0.0651,
+      "step": 61500
+    },
+    {
+      "epoch": 2.8845259142086164,
+      "grad_norm": 1.3886868953704834,
+      "learning_rate": 8.461896343165536e-06,
+      "loss": 0.066,
+      "step": 62000
+    },
+    {
+      "epoch": 2.907788219968363,
+      "grad_norm": 0.9660001397132874,
+      "learning_rate": 8.368847120126547e-06,
+      "loss": 0.0683,
+      "step": 62500
+    },
+    {
+      "epoch": 2.9310505257281103,
+      "grad_norm": 0.8844442963600159,
+      "learning_rate": 8.27579789708756e-06,
+      "loss": 0.0671,
+      "step": 63000
+    },
+    {
+      "epoch": 2.954312831487857,
+      "grad_norm": 2.407435417175293,
+      "learning_rate": 8.182748674048572e-06,
+      "loss": 0.0664,
+      "step": 63500
+    },
+    {
+      "epoch": 2.9775751372476043,
+      "grad_norm": 2.187854528427124,
+      "learning_rate": 8.089699451009585e-06,
+      "loss": 0.0666,
+      "step": 64000
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.5649948822927329,
+      "eval_f1": 0.7591117292255597,
+      "eval_loss": 0.09460150450468063,
+      "eval_roc_auc": 0.8701062840131171,
+      "eval_runtime": 23.3037,
+      "eval_samples_per_second": 922.345,
+      "eval_steps_per_second": 115.304,
+      "step": 64482
+    },
+    {
+      "epoch": 3.000837443007351,
+      "grad_norm": 1.4572051763534546,
+      "learning_rate": 7.996650227970597e-06,
+      "loss": 0.0662,
+      "step": 64500
+    },
+    {
+      "epoch": 3.0240997487670978,
+      "grad_norm": 1.0595591068267822,
+      "learning_rate": 7.90360100493161e-06,
+      "loss": 0.0532,
+      "step": 65000
+    },
+    {
+      "epoch": 3.0473620545268445,
+      "grad_norm": 0.7926930785179138,
+      "learning_rate": 7.81055178189262e-06,
+      "loss": 0.0513,
+      "step": 65500
+    },
+    {
+      "epoch": 3.0706243602865917,
+      "grad_norm": 2.0223031044006348,
+      "learning_rate": 7.717502558853635e-06,
+      "loss": 0.056,
+      "step": 66000
+    },
+    {
+      "epoch": 3.0938866660463384,
+      "grad_norm": 1.3608500957489014,
+      "learning_rate": 7.624453335814647e-06,
+      "loss": 0.051,
+      "step": 66500
+    },
+    {
+      "epoch": 3.1171489718060856,
+      "grad_norm": 1.0539377927780151,
+      "learning_rate": 7.531404112775659e-06,
+      "loss": 0.0535,
+      "step": 67000
+    },
+    {
+      "epoch": 3.1404112775658324,
+      "grad_norm": 0.8993151187896729,
+      "learning_rate": 7.4383548897366704e-06,
+      "loss": 0.0529,
+      "step": 67500
+    },
+    {
+      "epoch": 3.163673583325579,
+      "grad_norm": 1.5115621089935303,
+      "learning_rate": 7.345305666697684e-06,
+      "loss": 0.0512,
+      "step": 68000
+    },
+    {
+      "epoch": 3.1869358890853263,
+      "grad_norm": 0.8718969225883484,
+      "learning_rate": 7.2522564436586965e-06,
+      "loss": 0.0517,
+      "step": 68500
+    },
+    {
+      "epoch": 3.210198194845073,
+      "grad_norm": 1.0791226625442505,
+      "learning_rate": 7.159207220619708e-06,
+      "loss": 0.0512,
+      "step": 69000
+    },
+    {
+      "epoch": 3.23346050060482,
+      "grad_norm": 1.2362322807312012,
+      "learning_rate": 7.06615799758072e-06,
+      "loss": 0.0505,
+      "step": 69500
+    },
+    {
+      "epoch": 3.256722806364567,
+      "grad_norm": 0.4878983497619629,
+      "learning_rate": 6.9731087745417335e-06,
+      "loss": 0.0523,
+      "step": 70000
+    },
+    {
+      "epoch": 3.2799851121243138,
+      "grad_norm": 0.5156907439231873,
+      "learning_rate": 6.880059551502746e-06,
+      "loss": 0.0522,
+      "step": 70500
+    },
+    {
+      "epoch": 3.3032474178840605,
+      "grad_norm": 1.0036829710006714,
+      "learning_rate": 6.787010328463758e-06,
+      "loss": 0.053,
+      "step": 71000
+    },
+    {
+      "epoch": 3.3265097236438077,
+      "grad_norm": 1.9383690357208252,
+      "learning_rate": 6.69396110542477e-06,
+      "loss": 0.0524,
+      "step": 71500
+    },
+    {
+      "epoch": 3.3497720294035545,
+      "grad_norm": 0.9468953609466553,
+      "learning_rate": 6.600911882385782e-06,
+      "loss": 0.0507,
+      "step": 72000
+    },
+    {
+      "epoch": 3.373034335163301,
+      "grad_norm": 1.7953852415084839,
+      "learning_rate": 6.507862659346795e-06,
+      "loss": 0.0536,
+      "step": 72500
+    },
+    {
+      "epoch": 3.3962966409230484,
+      "grad_norm": 3.1470677852630615,
+      "learning_rate": 6.4148134363078075e-06,
+      "loss": 0.0521,
+      "step": 73000
+    },
+    {
+      "epoch": 3.419558946682795,
+      "grad_norm": 2.5121142864227295,
+      "learning_rate": 6.321764213268819e-06,
+      "loss": 0.0513,
+      "step": 73500
+    },
+    {
+      "epoch": 3.442821252442542,
+      "grad_norm": 1.0771255493164062,
+      "learning_rate": 6.228714990229832e-06,
+      "loss": 0.0529,
+      "step": 74000
+    },
+    {
+      "epoch": 3.466083558202289,
+      "grad_norm": 1.3458467721939087,
+      "learning_rate": 6.1356657671908446e-06,
+      "loss": 0.053,
+      "step": 74500
+    },
+    {
+      "epoch": 3.489345863962036,
+      "grad_norm": 0.29975369572639465,
+      "learning_rate": 6.042616544151857e-06,
+      "loss": 0.0512,
+      "step": 75000
+    },
+    {
+      "epoch": 3.5126081697217826,
+      "grad_norm": 1.2391622066497803,
+      "learning_rate": 5.949567321112869e-06,
+      "loss": 0.0519,
+      "step": 75500
+    },
+    {
+      "epoch": 3.5358704754815298,
+      "grad_norm": 1.6513882875442505,
+      "learning_rate": 5.8565180980738816e-06,
+      "loss": 0.0529,
+      "step": 76000
+    },
+    {
+      "epoch": 3.5591327812412765,
+      "grad_norm": 1.1643694639205933,
+      "learning_rate": 5.763468875034894e-06,
+      "loss": 0.0537,
+      "step": 76500
+    },
+    {
+      "epoch": 3.5823950870010233,
+      "grad_norm": 1.9166603088378906,
+      "learning_rate": 5.670419651995907e-06,
+      "loss": 0.0508,
+      "step": 77000
+    },
+    {
+      "epoch": 3.6056573927607705,
+      "grad_norm": 1.5334446430206299,
+      "learning_rate": 5.5773704289569186e-06,
+      "loss": 0.0514,
+      "step": 77500
+    },
+    {
+      "epoch": 3.628919698520517,
+      "grad_norm": 2.500365972518921,
+      "learning_rate": 5.48432120591793e-06,
+      "loss": 0.0537,
+      "step": 78000
+    },
+    {
+      "epoch": 3.6521820042802644,
+      "grad_norm": 0.8305968046188354,
+      "learning_rate": 5.391271982878944e-06,
+      "loss": 0.0546,
+      "step": 78500
+    },
+    {
+      "epoch": 3.675444310040011,
+      "grad_norm": 1.3438687324523926,
+      "learning_rate": 5.2982227598399564e-06,
+      "loss": 0.0536,
+      "step": 79000
+    },
+    {
+      "epoch": 3.6987066157997583,
+      "grad_norm": 1.1025956869125366,
+      "learning_rate": 5.205173536800968e-06,
+      "loss": 0.0546,
+      "step": 79500
+    },
+    {
+      "epoch": 3.721968921559505,
+      "grad_norm": 1.6700533628463745,
+      "learning_rate": 5.11212431376198e-06,
+      "loss": 0.0533,
+      "step": 80000
+    },
+    {
+      "epoch": 3.745231227319252,
+      "grad_norm": 0.8916147351264954,
+      "learning_rate": 5.019075090722993e-06,
+      "loss": 0.0516,
+      "step": 80500
+    },
+    {
+      "epoch": 3.768493533078999,
+      "grad_norm": 1.7522839307785034,
+      "learning_rate": 4.926025867684005e-06,
+      "loss": 0.0537,
+      "step": 81000
+    },
+    {
+      "epoch": 3.791755838838746,
+      "grad_norm": 1.4133764505386353,
+      "learning_rate": 4.832976644645018e-06,
+      "loss": 0.0542,
+      "step": 81500
+    },
+    {
+      "epoch": 3.8150181445984925,
+      "grad_norm": 0.9128021001815796,
+      "learning_rate": 4.73992742160603e-06,
+      "loss": 0.0516,
+      "step": 82000
+    },
+    {
+      "epoch": 3.8382804503582397,
+      "grad_norm": 2.4152848720550537,
+      "learning_rate": 4.646878198567042e-06,
+      "loss": 0.0532,
+      "step": 82500
+    },
+    {
+      "epoch": 3.8615427561179865,
+      "grad_norm": 1.5950450897216797,
+      "learning_rate": 4.553828975528055e-06,
+      "loss": 0.0523,
+      "step": 83000
+    },
+    {
+      "epoch": 3.8848050618777332,
+      "grad_norm": 1.5487794876098633,
+      "learning_rate": 4.4607797524890675e-06,
+      "loss": 0.054,
+      "step": 83500
+    },
+    {
+      "epoch": 3.9080673676374804,
+      "grad_norm": 1.6051621437072754,
+      "learning_rate": 4.367730529450079e-06,
+      "loss": 0.052,
+      "step": 84000
+    },
+    {
+      "epoch": 3.931329673397227,
+      "grad_norm": 1.2082515954971313,
+      "learning_rate": 4.274681306411092e-06,
+      "loss": 0.0512,
+      "step": 84500
+    },
+    {
+      "epoch": 3.954591979156974,
+      "grad_norm": 1.1482079029083252,
+      "learning_rate": 4.1816320833721045e-06,
+      "loss": 0.0514,
+      "step": 85000
+    },
+    {
+      "epoch": 3.977854284916721,
+      "grad_norm": 1.919583797454834,
+      "learning_rate": 4.088582860333117e-06,
+      "loss": 0.0543,
+      "step": 85500
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.5645761607890574,
+      "eval_f1": 0.759579488098438,
+      "eval_loss": 0.10311879962682724,
+      "eval_roc_auc": 0.8744104988949349,
+      "eval_runtime": 22.8736,
+      "eval_samples_per_second": 939.686,
+      "eval_steps_per_second": 117.472,
+      "step": 85976
+    },
+    {
+      "epoch": 4.001116590676467,
+      "grad_norm": 0.9219486713409424,
+      "learning_rate": 3.995533637294129e-06,
+      "loss": 0.0531,
+      "step": 86000
+    },
+    {
+      "epoch": 4.024378896436215,
+      "grad_norm": 0.5063530206680298,
+      "learning_rate": 3.902484414255141e-06,
+      "loss": 0.0431,
+      "step": 86500
+    },
+    {
+      "epoch": 4.047641202195962,
+      "grad_norm": 0.9435988068580627,
+      "learning_rate": 3.8094351912161537e-06,
+      "loss": 0.0419,
+      "step": 87000
+    },
+    {
+      "epoch": 4.070903507955709,
+      "grad_norm": 0.8546033501625061,
+      "learning_rate": 3.716385968177166e-06,
+      "loss": 0.0413,
+      "step": 87500
+    },
+    {
+      "epoch": 4.094165813715455,
+      "grad_norm": 1.6249778270721436,
+      "learning_rate": 3.6233367451381785e-06,
+      "loss": 0.0429,
+      "step": 88000
+    },
+    {
+      "epoch": 4.1174281194752025,
+      "grad_norm": 1.7502926588058472,
+      "learning_rate": 3.5302875220991907e-06,
+      "loss": 0.0395,
+      "step": 88500
+    },
+    {
+      "epoch": 4.14069042523495,
+      "grad_norm": 3.170189380645752,
+      "learning_rate": 3.4372382990602033e-06,
+      "loss": 0.0431,
+      "step": 89000
+    },
+    {
+      "epoch": 4.163952730994696,
+      "grad_norm": 1.2306873798370361,
+      "learning_rate": 3.3441890760212155e-06,
+      "loss": 0.0419,
+      "step": 89500
+    },
+    {
+      "epoch": 4.187215036754443,
+      "grad_norm": 2.3752849102020264,
+      "learning_rate": 3.251139852982228e-06,
+      "loss": 0.0426,
+      "step": 90000
+    },
+    {
+      "epoch": 4.21047734251419,
+      "grad_norm": 1.0530017614364624,
+      "learning_rate": 3.1580906299432403e-06,
+      "loss": 0.0406,
+      "step": 90500
+    },
+    {
+      "epoch": 4.233739648273937,
+      "grad_norm": 1.7250635623931885,
+      "learning_rate": 3.065041406904253e-06,
+      "loss": 0.0431,
+      "step": 91000
+    },
+    {
+      "epoch": 4.257001954033684,
+      "grad_norm": 0.6301759481430054,
+      "learning_rate": 2.9719921838652647e-06,
+      "loss": 0.0437,
+      "step": 91500
+    },
+    {
+      "epoch": 4.280264259793431,
+      "grad_norm": 2.9674508571624756,
+      "learning_rate": 2.8789429608262777e-06,
+      "loss": 0.0429,
+      "step": 92000
+    },
+    {
+      "epoch": 4.303526565553177,
+      "grad_norm": 1.3684778213500977,
+      "learning_rate": 2.7858937377872895e-06,
+      "loss": 0.0413,
+      "step": 92500
+    },
+    {
+      "epoch": 4.326788871312925,
+      "grad_norm": 2.5620508193969727,
+      "learning_rate": 2.692844514748302e-06,
+      "loss": 0.0425,
+      "step": 93000
+    },
+    {
+      "epoch": 4.350051177072672,
+      "grad_norm": 2.529858350753784,
+      "learning_rate": 2.5997952917093143e-06,
+      "loss": 0.0432,
+      "step": 93500
+    },
+    {
+      "epoch": 4.373313482832418,
+      "grad_norm": 1.6359411478042603,
+      "learning_rate": 2.5067460686703265e-06,
+      "loss": 0.0437,
+      "step": 94000
+    },
+    {
+      "epoch": 4.396575788592165,
+      "grad_norm": 1.6633356809616089,
+      "learning_rate": 2.413696845631339e-06,
+      "loss": 0.0415,
+      "step": 94500
+    },
+    {
+      "epoch": 4.419838094351912,
+      "grad_norm": 0.9840025901794434,
+      "learning_rate": 2.3206476225923518e-06,
+      "loss": 0.0387,
+      "step": 95000
+    },
+    {
+      "epoch": 4.443100400111659,
+      "grad_norm": 1.1913479566574097,
+      "learning_rate": 2.227598399553364e-06,
+      "loss": 0.0433,
+      "step": 95500
+    },
+    {
+      "epoch": 4.466362705871406,
+      "grad_norm": 0.9769937992095947,
+      "learning_rate": 2.1345491765143766e-06,
+      "loss": 0.042,
+      "step": 96000
+    },
+    {
+      "epoch": 4.489625011631153,
+      "grad_norm": 0.4699022173881531,
+      "learning_rate": 2.0414999534753888e-06,
+      "loss": 0.0416,
+      "step": 96500
+    },
+    {
+      "epoch": 4.512887317390899,
+      "grad_norm": 1.7338500022888184,
+      "learning_rate": 1.948450730436401e-06,
+      "loss": 0.0422,
+      "step": 97000
+    },
+    {
+      "epoch": 4.536149623150647,
+      "grad_norm": 2.9296669960021973,
+      "learning_rate": 1.8554015073974132e-06,
+      "loss": 0.041,
+      "step": 97500
+    },
+    {
+      "epoch": 4.559411928910394,
+      "grad_norm": 2.108750820159912,
+      "learning_rate": 1.7623522843584256e-06,
+      "loss": 0.0406,
+      "step": 98000
+    },
+    {
+      "epoch": 4.58267423467014,
+      "grad_norm": 1.38349449634552,
+      "learning_rate": 1.669303061319438e-06,
+      "loss": 0.0403,
+      "step": 98500
+    },
+    {
+      "epoch": 4.605936540429887,
+      "grad_norm": 1.092578411102295,
+      "learning_rate": 1.5762538382804504e-06,
+      "loss": 0.0419,
+      "step": 99000
+    },
+    {
+      "epoch": 4.6291988461896345,
+      "grad_norm": 2.6619553565979004,
+      "learning_rate": 1.4832046152414628e-06,
+      "loss": 0.0394,
+      "step": 99500
+    },
+    {
+      "epoch": 4.652461151949382,
+      "grad_norm": 1.8110424280166626,
+      "learning_rate": 1.3901553922024752e-06,
+      "loss": 0.0407,
+      "step": 100000
+    },
+    {
+      "epoch": 4.675723457709128,
+      "grad_norm": 1.2239103317260742,
+      "learning_rate": 1.2971061691634876e-06,
+      "loss": 0.0415,
+      "step": 100500
+    },
+    {
+      "epoch": 4.698985763468875,
+      "grad_norm": 3.0208041667938232,
+      "learning_rate": 1.2040569461245e-06,
+      "loss": 0.0411,
+      "step": 101000
+    },
+    {
+      "epoch": 4.7222480692286215,
+      "grad_norm": 1.5058140754699707,
+      "learning_rate": 1.1110077230855124e-06,
+      "loss": 0.0431,
+      "step": 101500
+    },
+    {
+      "epoch": 4.745510374988369,
+      "grad_norm": 1.6732498407363892,
+      "learning_rate": 1.0179585000465248e-06,
+      "loss": 0.0408,
+      "step": 102000
+    },
+    {
+      "epoch": 4.768772680748116,
+      "grad_norm": 2.15928053855896,
+      "learning_rate": 9.249092770075371e-07,
+      "loss": 0.0412,
+      "step": 102500
+    },
+    {
+      "epoch": 4.792034986507863,
+      "grad_norm": 1.8211805820465088,
+      "learning_rate": 8.318600539685494e-07,
+      "loss": 0.0418,
+      "step": 103000
+    },
+    {
+      "epoch": 4.815297292267609,
+      "grad_norm": 1.1392755508422852,
+      "learning_rate": 7.388108309295617e-07,
+      "loss": 0.0395,
+      "step": 103500
+    },
+    {
+      "epoch": 4.838559598027357,
+      "grad_norm": 1.2640013694763184,
+      "learning_rate": 6.457616078905741e-07,
+      "loss": 0.0404,
+      "step": 104000
+    },
+    {
+      "epoch": 4.861821903787103,
+      "grad_norm": 1.2413549423217773,
+      "learning_rate": 5.527123848515865e-07,
+      "loss": 0.0409,
+      "step": 104500
+    },
+    {
+      "epoch": 4.88508420954685,
+      "grad_norm": 0.14875428378582,
+      "learning_rate": 4.5966316181259895e-07,
+      "loss": 0.0405,
+      "step": 105000
+    },
+    {
+      "epoch": 4.908346515306597,
+      "grad_norm": 1.309793472290039,
+      "learning_rate": 3.6661393877361125e-07,
+      "loss": 0.041,
+      "step": 105500
+    },
+    {
+      "epoch": 4.9316088210663445,
+      "grad_norm": 0.9020711779594421,
+      "learning_rate": 2.7356471573462365e-07,
+      "loss": 0.0402,
+      "step": 106000
+    },
+    {
+      "epoch": 4.954871126826091,
+      "grad_norm": 1.875877857208252,
+      "learning_rate": 1.80515492695636e-07,
+      "loss": 0.0408,
+      "step": 106500
+    },
+    {
+      "epoch": 4.978133432585838,
+      "grad_norm": 3.464327335357666,
+      "learning_rate": 8.746626965664838e-08,
+      "loss": 0.0407,
+      "step": 107000
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.5647157346236159,
+      "eval_f1": 0.7594581273430615,
+      "eval_loss": 0.1101851612329483,
+      "eval_roc_auc": 0.8742792491333411,
+      "eval_runtime": 22.5758,
+      "eval_samples_per_second": 952.08,
+      "eval_steps_per_second": 119.021,
+      "step": 107470
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 107470,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.656124322596352e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f09a088fc659036909375538a041709b83ecfa3173e60c7388d9eccad28cbfbe
+size 5176

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff