Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

config.json +135 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +59 -0
trainer_state.json +1498 -0
training_args.bin +3 -0
vocab.txt +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,135 @@

+{
+  "architectures": [
+    "BertForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-IP_ADDRESS",
+    "2": "I-IP_ADDRESS",
+    "3": "B-SERVER_NAME",
+    "4": "I-SERVER_NAME",
+    "5": "B-DOMAIN",
+    "6": "I-DOMAIN",
+    "7": "B-PERSON",
+    "8": "I-PERSON",
+    "9": "B-PHONE_NUMBER",
+    "10": "I-PHONE_NUMBER",
+    "11": "B-EMAIL",
+    "12": "I-EMAIL",
+    "13": "B-IBAN_ACCOUNT",
+    "14": "I-IBAN_ACCOUNT",
+    "15": "B-BANK_ACCOUNT",
+    "16": "I-BANK_ACCOUNT",
+    "17": "B-BANK_NAME",
+    "18": "I-BANK_NAME",
+    "19": "B-AMOUNT",
+    "20": "I-AMOUNT",
+    "21": "B-CREDIT_CARD",
+    "22": "I-CREDIT_CARD",
+    "23": "B-GEO_UNIT_NUMBER",
+    "24": "I-GEO_UNIT_NUMBER",
+    "25": "B-BANK_BRANCH_NUMBER",
+    "26": "I-BANK_BRANCH_NUMBER",
+    "27": "B-STREET_ADDRESS",
+    "28": "I-STREET_ADDRESS",
+    "29": "B-CITY",
+    "30": "I-CITY",
+    "31": "B-PROVINCE_STATE",
+    "32": "I-PROVINCE_STATE",
+    "33": "B-ISRAEL_ID",
+    "34": "I-ISRAEL_ID",
+    "35": "B-PASSPORT",
+    "36": "I-PASSPORT",
+    "37": "B-COMPANY_NUMBER",
+    "38": "I-COMPANY_NUMBER",
+    "39": "B-COMPANY_NAME",
+    "40": "I-COMPANY_NAME",
+    "41": "B-JWT",
+    "42": "I-JWT",
+    "43": "B-USERNAME",
+    "44": "I-USERNAME",
+    "45": "B-PASSWORD",
+    "46": "I-PASSWORD",
+    "47": "B-SECRET",
+    "48": "I-SECRET",
+    "49": "B-API_KEY",
+    "50": "I-API_KEY",
+    "51": "B-CERTIFICATE",
+    "52": "I-CERTIFICATE"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-AMOUNT": 19,
+    "B-API_KEY": 49,
+    "B-BANK_ACCOUNT": 15,
+    "B-BANK_BRANCH_NUMBER": 25,
+    "B-BANK_NAME": 17,
+    "B-CERTIFICATE": 51,
+    "B-CITY": 29,
+    "B-COMPANY_NAME": 39,
+    "B-COMPANY_NUMBER": 37,
+    "B-CREDIT_CARD": 21,
+    "B-DOMAIN": 5,
+    "B-EMAIL": 11,
+    "B-GEO_UNIT_NUMBER": 23,
+    "B-IBAN_ACCOUNT": 13,
+    "B-IP_ADDRESS": 1,
+    "B-ISRAEL_ID": 33,
+    "B-JWT": 41,
+    "B-PASSPORT": 35,
+    "B-PASSWORD": 45,
+    "B-PERSON": 7,
+    "B-PHONE_NUMBER": 9,
+    "B-PROVINCE_STATE": 31,
+    "B-SECRET": 47,
+    "B-SERVER_NAME": 3,
+    "B-STREET_ADDRESS": 27,
+    "B-USERNAME": 43,
+    "I-AMOUNT": 20,
+    "I-API_KEY": 50,
+    "I-BANK_ACCOUNT": 16,
+    "I-BANK_BRANCH_NUMBER": 26,
+    "I-BANK_NAME": 18,
+    "I-CERTIFICATE": 52,
+    "I-CITY": 30,
+    "I-COMPANY_NAME": 40,
+    "I-COMPANY_NUMBER": 38,
+    "I-CREDIT_CARD": 22,
+    "I-DOMAIN": 6,
+    "I-EMAIL": 12,
+    "I-GEO_UNIT_NUMBER": 24,
+    "I-IBAN_ACCOUNT": 14,
+    "I-IP_ADDRESS": 2,
+    "I-ISRAEL_ID": 34,
+    "I-JWT": 42,
+    "I-PASSPORT": 36,
+    "I-PASSWORD": 46,
+    "I-PERSON": 8,
+    "I-PHONE_NUMBER": 10,
+    "I-PROVINCE_STATE": 32,
+    "I-SECRET": 48,
+    "I-SERVER_NAME": 4,
+    "I-STREET_ADDRESS": 28,
+    "I-USERNAME": 44,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.54.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 52000
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12d079162172f13d0fe7f6f2249c2c23bd1475023642fba552eb9bdef7ef9b27
+size 501730324

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60fef75e6f7fe835f7b68b636887c38793bd33989a1025bf4ce08876c514f1c6
+size 1003580346

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31468f08e7a9f95f89f9d0b4ad33a8298bf66b13e3b505fc5c1c46943b2cd3f1
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c202dacfc4d7001609152c6fc324bee0b3cf258928bb66e63d80186a00991fb0
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_len": 512,
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1498 @@

+{
+  "best_global_step": 20475,
+  "best_metric": 1.0,
+  "best_model_checkpoint": "./ner_model_logs/checkpoint-20475",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 20475,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.014652014652014652,
+      "grad_norm": 2.3198535442352295,
+      "learning_rate": 1.9903296703296703e-05,
+      "loss": 0.721,
+      "step": 100
+    },
+    {
+      "epoch": 0.029304029304029304,
+      "grad_norm": 0.227557972073555,
+      "learning_rate": 1.9805616605616607e-05,
+      "loss": 0.0878,
+      "step": 200
+    },
+    {
+      "epoch": 0.04395604395604396,
+      "grad_norm": 3.8474526405334473,
+      "learning_rate": 1.970793650793651e-05,
+      "loss": 0.0133,
+      "step": 300
+    },
+    {
+      "epoch": 0.05860805860805861,
+      "grad_norm": 0.06616735458374023,
+      "learning_rate": 1.961025641025641e-05,
+      "loss": 0.0051,
+      "step": 400
+    },
+    {
+      "epoch": 0.07326007326007326,
+      "grad_norm": 0.02466263622045517,
+      "learning_rate": 1.9512576312576314e-05,
+      "loss": 0.0046,
+      "step": 500
+    },
+    {
+      "epoch": 0.08791208791208792,
+      "grad_norm": 0.3532762825489044,
+      "learning_rate": 1.9414896214896218e-05,
+      "loss": 0.0033,
+      "step": 600
+    },
+    {
+      "epoch": 0.10256410256410256,
+      "grad_norm": 1.2159192562103271,
+      "learning_rate": 1.931721611721612e-05,
+      "loss": 0.0023,
+      "step": 700
+    },
+    {
+      "epoch": 0.11721611721611722,
+      "grad_norm": 0.07953804731369019,
+      "learning_rate": 1.9219536019536022e-05,
+      "loss": 0.0036,
+      "step": 800
+    },
+    {
+      "epoch": 0.13186813186813187,
+      "grad_norm": 0.022895004600286484,
+      "learning_rate": 1.9121855921855922e-05,
+      "loss": 0.0018,
+      "step": 900
+    },
+    {
+      "epoch": 0.14652014652014653,
+      "grad_norm": 0.0714658722281456,
+      "learning_rate": 1.9024175824175826e-05,
+      "loss": 0.0013,
+      "step": 1000
+    },
+    {
+      "epoch": 0.16117216117216118,
+      "grad_norm": 0.07990960776805878,
+      "learning_rate": 1.892649572649573e-05,
+      "loss": 0.0011,
+      "step": 1100
+    },
+    {
+      "epoch": 0.17582417582417584,
+      "grad_norm": 0.005112498998641968,
+      "learning_rate": 1.882881562881563e-05,
+      "loss": 0.0013,
+      "step": 1200
+    },
+    {
+      "epoch": 0.19047619047619047,
+      "grad_norm": 0.004775651730597019,
+      "learning_rate": 1.8731135531135533e-05,
+      "loss": 0.0006,
+      "step": 1300
+    },
+    {
+      "epoch": 0.20512820512820512,
+      "grad_norm": 0.003744883695617318,
+      "learning_rate": 1.8633455433455434e-05,
+      "loss": 0.0004,
+      "step": 1400
+    },
+    {
+      "epoch": 0.21978021978021978,
+      "grad_norm": 0.16701260209083557,
+      "learning_rate": 1.8535775335775337e-05,
+      "loss": 0.0007,
+      "step": 1500
+    },
+    {
+      "epoch": 0.23443223443223443,
+      "grad_norm": 0.012116856873035431,
+      "learning_rate": 1.843809523809524e-05,
+      "loss": 0.0019,
+      "step": 1600
+    },
+    {
+      "epoch": 0.2490842490842491,
+      "grad_norm": 0.03328242152929306,
+      "learning_rate": 1.834041514041514e-05,
+      "loss": 0.0006,
+      "step": 1700
+    },
+    {
+      "epoch": 0.26373626373626374,
+      "grad_norm": 0.48128026723861694,
+      "learning_rate": 1.8242735042735045e-05,
+      "loss": 0.0022,
+      "step": 1800
+    },
+    {
+      "epoch": 0.2783882783882784,
+      "grad_norm": 0.008382058702409267,
+      "learning_rate": 1.814505494505495e-05,
+      "loss": 0.001,
+      "step": 1900
+    },
+    {
+      "epoch": 0.29304029304029305,
+      "grad_norm": 0.0023408152628690004,
+      "learning_rate": 1.804737484737485e-05,
+      "loss": 0.0009,
+      "step": 2000
+    },
+    {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 0.003858278738334775,
+      "learning_rate": 1.794969474969475e-05,
+      "loss": 0.0075,
+      "step": 2100
+    },
+    {
+      "epoch": 0.32234432234432236,
+      "grad_norm": 0.005588823929429054,
+      "learning_rate": 1.7852014652014653e-05,
+      "loss": 0.0025,
+      "step": 2200
+    },
+    {
+      "epoch": 0.336996336996337,
+      "grad_norm": 0.002319753635674715,
+      "learning_rate": 1.7754334554334556e-05,
+      "loss": 0.0004,
+      "step": 2300
+    },
+    {
+      "epoch": 0.3516483516483517,
+      "grad_norm": 0.002465345896780491,
+      "learning_rate": 1.765665445665446e-05,
+      "loss": 0.001,
+      "step": 2400
+    },
+    {
+      "epoch": 0.3663003663003663,
+      "grad_norm": 0.014657393097877502,
+      "learning_rate": 1.755897435897436e-05,
+      "loss": 0.0009,
+      "step": 2500
+    },
+    {
+      "epoch": 0.38095238095238093,
+      "grad_norm": 0.0048686908558011055,
+      "learning_rate": 1.7461294261294264e-05,
+      "loss": 0.0013,
+      "step": 2600
+    },
+    {
+      "epoch": 0.3956043956043956,
+      "grad_norm": 0.0015726798446848989,
+      "learning_rate": 1.7363614163614164e-05,
+      "loss": 0.0004,
+      "step": 2700
+    },
+    {
+      "epoch": 0.41025641025641024,
+      "grad_norm": 0.002173542743548751,
+      "learning_rate": 1.7265934065934068e-05,
+      "loss": 0.0002,
+      "step": 2800
+    },
+    {
+      "epoch": 0.4249084249084249,
+      "grad_norm": 0.0014299682807177305,
+      "learning_rate": 1.7168253968253968e-05,
+      "loss": 0.0001,
+      "step": 2900
+    },
+    {
+      "epoch": 0.43956043956043955,
+      "grad_norm": 0.001351997023448348,
+      "learning_rate": 1.707057387057387e-05,
+      "loss": 0.0003,
+      "step": 3000
+    },
+    {
+      "epoch": 0.4542124542124542,
+      "grad_norm": 0.04424109309911728,
+      "learning_rate": 1.6972893772893775e-05,
+      "loss": 0.0004,
+      "step": 3100
+    },
+    {
+      "epoch": 0.46886446886446886,
+      "grad_norm": 0.0023619807325303555,
+      "learning_rate": 1.687521367521368e-05,
+      "loss": 0.0018,
+      "step": 3200
+    },
+    {
+      "epoch": 0.4835164835164835,
+      "grad_norm": 0.0014780177734792233,
+      "learning_rate": 1.677753357753358e-05,
+      "loss": 0.0003,
+      "step": 3300
+    },
+    {
+      "epoch": 0.4981684981684982,
+      "grad_norm": 0.0007553008617833257,
+      "learning_rate": 1.667985347985348e-05,
+      "loss": 0.0001,
+      "step": 3400
+    },
+    {
+      "epoch": 0.5128205128205128,
+      "grad_norm": 0.0011919466778635979,
+      "learning_rate": 1.6582173382173383e-05,
+      "loss": 0.0005,
+      "step": 3500
+    },
+    {
+      "epoch": 0.5274725274725275,
+      "grad_norm": 0.0008271373226307333,
+      "learning_rate": 1.6484493284493287e-05,
+      "loss": 0.0005,
+      "step": 3600
+    },
+    {
+      "epoch": 0.5421245421245421,
+      "grad_norm": 0.0020716034341603518,
+      "learning_rate": 1.6386813186813187e-05,
+      "loss": 0.0001,
+      "step": 3700
+    },
+    {
+      "epoch": 0.5567765567765568,
+      "grad_norm": 0.0011554955272004008,
+      "learning_rate": 1.628913308913309e-05,
+      "loss": 0.0007,
+      "step": 3800
+    },
+    {
+      "epoch": 0.5714285714285714,
+      "grad_norm": 0.0009056870476342738,
+      "learning_rate": 1.6191452991452994e-05,
+      "loss": 0.0001,
+      "step": 3900
+    },
+    {
+      "epoch": 0.5860805860805861,
+      "grad_norm": 0.00144649064168334,
+      "learning_rate": 1.6093772893772894e-05,
+      "loss": 0.0002,
+      "step": 4000
+    },
+    {
+      "epoch": 0.6007326007326007,
+      "grad_norm": 0.0012368283933028579,
+      "learning_rate": 1.5996092796092798e-05,
+      "loss": 0.0012,
+      "step": 4100
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 0.032919418066740036,
+      "learning_rate": 1.58984126984127e-05,
+      "loss": 0.0013,
+      "step": 4200
+    },
+    {
+      "epoch": 0.63003663003663,
+      "grad_norm": 0.0019616656936705112,
+      "learning_rate": 1.5800732600732602e-05,
+      "loss": 0.0013,
+      "step": 4300
+    },
+    {
+      "epoch": 0.6446886446886447,
+      "grad_norm": 0.006232458166778088,
+      "learning_rate": 1.5703052503052506e-05,
+      "loss": 0.0007,
+      "step": 4400
+    },
+    {
+      "epoch": 0.6593406593406593,
+      "grad_norm": 0.00093706080224365,
+      "learning_rate": 1.5605372405372406e-05,
+      "loss": 0.0005,
+      "step": 4500
+    },
+    {
+      "epoch": 0.673992673992674,
+      "grad_norm": 0.001097803469747305,
+      "learning_rate": 1.550769230769231e-05,
+      "loss": 0.0001,
+      "step": 4600
+    },
+    {
+      "epoch": 0.6886446886446886,
+      "grad_norm": 0.0011469683377072215,
+      "learning_rate": 1.541001221001221e-05,
+      "loss": 0.0001,
+      "step": 4700
+    },
+    {
+      "epoch": 0.7032967032967034,
+      "grad_norm": 0.001127622788771987,
+      "learning_rate": 1.5312332112332114e-05,
+      "loss": 0.0006,
+      "step": 4800
+    },
+    {
+      "epoch": 0.717948717948718,
+      "grad_norm": 0.0017739522736519575,
+      "learning_rate": 1.5214652014652015e-05,
+      "loss": 0.0006,
+      "step": 4900
+    },
+    {
+      "epoch": 0.7326007326007326,
+      "grad_norm": 0.00775282084941864,
+      "learning_rate": 1.5116971916971919e-05,
+      "loss": 0.0001,
+      "step": 5000
+    },
+    {
+      "epoch": 0.7472527472527473,
+      "grad_norm": 0.0007407303201034665,
+      "learning_rate": 1.5019291819291821e-05,
+      "loss": 0.0004,
+      "step": 5100
+    },
+    {
+      "epoch": 0.7619047619047619,
+      "grad_norm": 0.0013244397705420852,
+      "learning_rate": 1.4921611721611721e-05,
+      "loss": 0.0002,
+      "step": 5200
+    },
+    {
+      "epoch": 0.7765567765567766,
+      "grad_norm": 0.0006274768384173512,
+      "learning_rate": 1.4823931623931625e-05,
+      "loss": 0.0001,
+      "step": 5300
+    },
+    {
+      "epoch": 0.7912087912087912,
+      "grad_norm": 0.001210793387144804,
+      "learning_rate": 1.4726251526251527e-05,
+      "loss": 0.0003,
+      "step": 5400
+    },
+    {
+      "epoch": 0.8058608058608059,
+      "grad_norm": 0.002480907831341028,
+      "learning_rate": 1.462857142857143e-05,
+      "loss": 0.0005,
+      "step": 5500
+    },
+    {
+      "epoch": 0.8205128205128205,
+      "grad_norm": 0.0005772067815996706,
+      "learning_rate": 1.4530891330891333e-05,
+      "loss": 0.0008,
+      "step": 5600
+    },
+    {
+      "epoch": 0.8351648351648352,
+      "grad_norm": 0.002250727266073227,
+      "learning_rate": 1.4433211233211234e-05,
+      "loss": 0.0023,
+      "step": 5700
+    },
+    {
+      "epoch": 0.8498168498168498,
+      "grad_norm": 0.001360761933028698,
+      "learning_rate": 1.4335531135531138e-05,
+      "loss": 0.0001,
+      "step": 5800
+    },
+    {
+      "epoch": 0.8644688644688645,
+      "grad_norm": 0.004648945759981871,
+      "learning_rate": 1.4237851037851038e-05,
+      "loss": 0.0001,
+      "step": 5900
+    },
+    {
+      "epoch": 0.8791208791208791,
+      "grad_norm": 0.0005541480495594442,
+      "learning_rate": 1.414017094017094e-05,
+      "loss": 0.0001,
+      "step": 6000
+    },
+    {
+      "epoch": 0.8937728937728938,
+      "grad_norm": 0.0015395252266898751,
+      "learning_rate": 1.4042490842490844e-05,
+      "loss": 0.0003,
+      "step": 6100
+    },
+    {
+      "epoch": 0.9084249084249084,
+      "grad_norm": 0.0007218205719254911,
+      "learning_rate": 1.3944810744810746e-05,
+      "loss": 0.0001,
+      "step": 6200
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 0.0005430618184618652,
+      "learning_rate": 1.384713064713065e-05,
+      "loss": 0.0,
+      "step": 6300
+    },
+    {
+      "epoch": 0.9377289377289377,
+      "grad_norm": 0.0004149100568611175,
+      "learning_rate": 1.3749450549450552e-05,
+      "loss": 0.0,
+      "step": 6400
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 0.0004269062774255872,
+      "learning_rate": 1.3651770451770452e-05,
+      "loss": 0.0,
+      "step": 6500
+    },
+    {
+      "epoch": 0.967032967032967,
+      "grad_norm": 0.0002754240995272994,
+      "learning_rate": 1.3554090354090354e-05,
+      "loss": 0.0001,
+      "step": 6600
+    },
+    {
+      "epoch": 0.9816849816849816,
+      "grad_norm": 0.0005386194679886103,
+      "learning_rate": 1.3456410256410257e-05,
+      "loss": 0.001,
+      "step": 6700
+    },
+    {
+      "epoch": 0.9963369963369964,
+      "grad_norm": 0.0007499135099351406,
+      "learning_rate": 1.335873015873016e-05,
+      "loss": 0.0006,
+      "step": 6800
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9999860922583947,
+      "eval_f1": 0.9996811563396747,
+      "eval_loss": 0.0001409229007549584,
+      "eval_precision": 0.9996356238423466,
+      "eval_recall": 0.99972669298512,
+      "eval_runtime": 299.7618,
+      "eval_samples_per_second": 91.072,
+      "eval_steps_per_second": 5.695,
+      "step": 6825
+    },
+    {
+      "epoch": 1.010989010989011,
+      "grad_norm": 0.00030950666405260563,
+      "learning_rate": 1.3261050061050063e-05,
+      "loss": 0.0002,
+      "step": 6900
+    },
+    {
+      "epoch": 1.0256410256410255,
+      "grad_norm": 0.0019652473274618387,
+      "learning_rate": 1.3163369963369965e-05,
+      "loss": 0.0012,
+      "step": 7000
+    },
+    {
+      "epoch": 1.0402930402930404,
+      "grad_norm": 0.00197768397629261,
+      "learning_rate": 1.3065689865689869e-05,
+      "loss": 0.0009,
+      "step": 7100
+    },
+    {
+      "epoch": 1.054945054945055,
+      "grad_norm": 0.0035088348668068647,
+      "learning_rate": 1.2968009768009769e-05,
+      "loss": 0.0004,
+      "step": 7200
+    },
+    {
+      "epoch": 1.0695970695970696,
+      "grad_norm": 0.0010668466566130519,
+      "learning_rate": 1.287032967032967e-05,
+      "loss": 0.0001,
+      "step": 7300
+    },
+    {
+      "epoch": 1.0842490842490842,
+      "grad_norm": 0.0011741893831640482,
+      "learning_rate": 1.2772649572649573e-05,
+      "loss": 0.0001,
+      "step": 7400
+    },
+    {
+      "epoch": 1.098901098901099,
+      "grad_norm": 0.0011687855003401637,
+      "learning_rate": 1.2674969474969476e-05,
+      "loss": 0.0002,
+      "step": 7500
+    },
+    {
+      "epoch": 1.1135531135531136,
+      "grad_norm": 0.0013387134531512856,
+      "learning_rate": 1.2577289377289378e-05,
+      "loss": 0.0009,
+      "step": 7600
+    },
+    {
+      "epoch": 1.1282051282051282,
+      "grad_norm": 0.004518842324614525,
+      "learning_rate": 1.2479609279609282e-05,
+      "loss": 0.0,
+      "step": 7700
+    },
+    {
+      "epoch": 1.1428571428571428,
+      "grad_norm": 0.007567571010440588,
+      "learning_rate": 1.2381929181929182e-05,
+      "loss": 0.0001,
+      "step": 7800
+    },
+    {
+      "epoch": 1.1575091575091574,
+      "grad_norm": 0.000706327729858458,
+      "learning_rate": 1.2284249084249084e-05,
+      "loss": 0.0002,
+      "step": 7900
+    },
+    {
+      "epoch": 1.1721611721611722,
+      "grad_norm": 0.0004430621338542551,
+      "learning_rate": 1.2186568986568988e-05,
+      "loss": 0.0004,
+      "step": 8000
+    },
+    {
+      "epoch": 1.1868131868131868,
+      "grad_norm": 0.003274950897321105,
+      "learning_rate": 1.208888888888889e-05,
+      "loss": 0.0,
+      "step": 8100
+    },
+    {
+      "epoch": 1.2014652014652014,
+      "grad_norm": 0.009472182020545006,
+      "learning_rate": 1.1991208791208793e-05,
+      "loss": 0.0,
+      "step": 8200
+    },
+    {
+      "epoch": 1.2161172161172162,
+      "grad_norm": 0.000377336866222322,
+      "learning_rate": 1.1893528693528695e-05,
+      "loss": 0.0001,
+      "step": 8300
+    },
+    {
+      "epoch": 1.2307692307692308,
+      "grad_norm": 0.00038757469155825675,
+      "learning_rate": 1.1795848595848596e-05,
+      "loss": 0.0002,
+      "step": 8400
+    },
+    {
+      "epoch": 1.2454212454212454,
+      "grad_norm": 0.000478828267659992,
+      "learning_rate": 1.1698168498168498e-05,
+      "loss": 0.0,
+      "step": 8500
+    },
+    {
+      "epoch": 1.26007326007326,
+      "grad_norm": 0.0003117730957455933,
+      "learning_rate": 1.1600488400488401e-05,
+      "loss": 0.0002,
+      "step": 8600
+    },
+    {
+      "epoch": 1.2747252747252746,
+      "grad_norm": 0.0005039616953581572,
+      "learning_rate": 1.1502808302808303e-05,
+      "loss": 0.0001,
+      "step": 8700
+    },
+    {
+      "epoch": 1.2893772893772895,
+      "grad_norm": 0.00023192950175143778,
+      "learning_rate": 1.1405128205128207e-05,
+      "loss": 0.0,
+      "step": 8800
+    },
+    {
+      "epoch": 1.304029304029304,
+      "grad_norm": 0.0033253557048738003,
+      "learning_rate": 1.1307448107448109e-05,
+      "loss": 0.0004,
+      "step": 8900
+    },
+    {
+      "epoch": 1.3186813186813187,
+      "grad_norm": 0.00022548828565049917,
+      "learning_rate": 1.1209768009768013e-05,
+      "loss": 0.0,
+      "step": 9000
+    },
+    {
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.00022915085719432682,
+      "learning_rate": 1.1112087912087913e-05,
+      "loss": 0.0,
+      "step": 9100
+    },
+    {
+      "epoch": 1.347985347985348,
+      "grad_norm": 0.00038897068588994443,
+      "learning_rate": 1.1014407814407815e-05,
+      "loss": 0.0003,
+      "step": 9200
+    },
+    {
+      "epoch": 1.3626373626373627,
+      "grad_norm": 0.00022964017989579588,
+      "learning_rate": 1.0916727716727717e-05,
+      "loss": 0.0,
+      "step": 9300
+    },
+    {
+      "epoch": 1.3772893772893773,
+      "grad_norm": 0.00019336077093612403,
+      "learning_rate": 1.081904761904762e-05,
+      "loss": 0.0,
+      "step": 9400
+    },
+    {
+      "epoch": 1.3919413919413919,
+      "grad_norm": 0.00016649049939587712,
+      "learning_rate": 1.0721367521367522e-05,
+      "loss": 0.0,
+      "step": 9500
+    },
+    {
+      "epoch": 1.4065934065934065,
+      "grad_norm": 0.0029115676879882812,
+      "learning_rate": 1.0623687423687426e-05,
+      "loss": 0.0,
+      "step": 9600
+    },
+    {
+      "epoch": 1.4212454212454213,
+      "grad_norm": 0.00016056567255873233,
+      "learning_rate": 1.0526007326007326e-05,
+      "loss": 0.0,
+      "step": 9700
+    },
+    {
+      "epoch": 1.435897435897436,
+      "grad_norm": 0.0007812806870788336,
+      "learning_rate": 1.0428327228327228e-05,
+      "loss": 0.0015,
+      "step": 9800
+    },
+    {
+      "epoch": 1.4505494505494505,
+      "grad_norm": 0.0008388872374780476,
+      "learning_rate": 1.0330647130647132e-05,
+      "loss": 0.0003,
+      "step": 9900
+    },
+    {
+      "epoch": 1.4652014652014653,
+      "grad_norm": 0.0003454253019299358,
+      "learning_rate": 1.0232967032967034e-05,
+      "loss": 0.0001,
+      "step": 10000
+    },
+    {
+      "epoch": 1.47985347985348,
+      "grad_norm": 0.00045976179535500705,
+      "learning_rate": 1.0135286935286936e-05,
+      "loss": 0.0,
+      "step": 10100
+    },
+    {
+      "epoch": 1.4945054945054945,
+      "grad_norm": 0.00019197350775357336,
+      "learning_rate": 1.003760683760684e-05,
+      "loss": 0.0,
+      "step": 10200
+    },
+    {
+      "epoch": 1.5091575091575091,
+      "grad_norm": 0.00016801034507807344,
+      "learning_rate": 9.939926739926741e-06,
+      "loss": 0.0001,
+      "step": 10300
+    },
+    {
+      "epoch": 1.5238095238095237,
+      "grad_norm": 1.131913423538208,
+      "learning_rate": 9.842246642246643e-06,
+      "loss": 0.0001,
+      "step": 10400
+    },
+    {
+      "epoch": 1.5384615384615383,
+      "grad_norm": 0.0002565563772805035,
+      "learning_rate": 9.744566544566545e-06,
+      "loss": 0.0,
+      "step": 10500
+    },
+    {
+      "epoch": 1.5531135531135531,
+      "grad_norm": 0.00042528833728283644,
+      "learning_rate": 9.646886446886447e-06,
+      "loss": 0.0002,
+      "step": 10600
+    },
+    {
+      "epoch": 1.5677655677655677,
+      "grad_norm": 0.012502134777605534,
+      "learning_rate": 9.54920634920635e-06,
+      "loss": 0.0003,
+      "step": 10700
+    },
+    {
+      "epoch": 1.5824175824175826,
+      "grad_norm": 0.0005918039241805673,
+      "learning_rate": 9.451526251526251e-06,
+      "loss": 0.0,
+      "step": 10800
+    },
+    {
+      "epoch": 1.5970695970695972,
+      "grad_norm": 0.0017294063000008464,
+      "learning_rate": 9.353846153846155e-06,
+      "loss": 0.0,
+      "step": 10900
+    },
+    {
+      "epoch": 1.6117216117216118,
+      "grad_norm": 0.00014640063454862684,
+      "learning_rate": 9.256166056166057e-06,
+      "loss": 0.0,
+      "step": 11000
+    },
+    {
+      "epoch": 1.6263736263736264,
+      "grad_norm": 0.0004110069421585649,
+      "learning_rate": 9.15848595848596e-06,
+      "loss": 0.0,
+      "step": 11100
+    },
+    {
+      "epoch": 1.641025641025641,
+      "grad_norm": 0.00027677713660523295,
+      "learning_rate": 9.06080586080586e-06,
+      "loss": 0.0001,
+      "step": 11200
+    },
+    {
+      "epoch": 1.6556776556776556,
+      "grad_norm": 0.0006185442907735705,
+      "learning_rate": 8.963125763125764e-06,
+      "loss": 0.0,
+      "step": 11300
+    },
+    {
+      "epoch": 1.6703296703296702,
+      "grad_norm": 0.00041590031469240785,
+      "learning_rate": 8.865445665445666e-06,
+      "loss": 0.0,
+      "step": 11400
+    },
+    {
+      "epoch": 1.684981684981685,
+      "grad_norm": 0.00022950036509428173,
+      "learning_rate": 8.767765567765568e-06,
+      "loss": 0.0,
+      "step": 11500
+    },
+    {
+      "epoch": 1.6996336996336996,
+      "grad_norm": 0.00020648095232900232,
+      "learning_rate": 8.67008547008547e-06,
+      "loss": 0.0,
+      "step": 11600
+    },
+    {
+      "epoch": 1.7142857142857144,
+      "grad_norm": 8.724381041247398e-05,
+      "learning_rate": 8.572405372405374e-06,
+      "loss": 0.0001,
+      "step": 11700
+    },
+    {
+      "epoch": 1.728937728937729,
+      "grad_norm": 0.0001912677544169128,
+      "learning_rate": 8.474725274725276e-06,
+      "loss": 0.0,
+      "step": 11800
+    },
+    {
+      "epoch": 1.7435897435897436,
+      "grad_norm": 0.00011383296805433929,
+      "learning_rate": 8.377045177045178e-06,
+      "loss": 0.0,
+      "step": 11900
+    },
+    {
+      "epoch": 1.7582417582417582,
+      "grad_norm": 0.00030712466104887426,
+      "learning_rate": 8.27936507936508e-06,
+      "loss": 0.0002,
+      "step": 12000
+    },
+    {
+      "epoch": 1.7728937728937728,
+      "grad_norm": 0.00013500441855285317,
+      "learning_rate": 8.181684981684982e-06,
+      "loss": 0.0,
+      "step": 12100
+    },
+    {
+      "epoch": 1.7875457875457874,
+      "grad_norm": 0.00015925474872346967,
+      "learning_rate": 8.084004884004885e-06,
+      "loss": 0.0007,
+      "step": 12200
+    },
+    {
+      "epoch": 1.8021978021978022,
+      "grad_norm": 0.0006131925620138645,
+      "learning_rate": 7.986324786324787e-06,
+      "loss": 0.0,
+      "step": 12300
+    },
+    {
+      "epoch": 1.8168498168498168,
+      "grad_norm": 0.001735904486849904,
+      "learning_rate": 7.888644688644689e-06,
+      "loss": 0.0002,
+      "step": 12400
+    },
+    {
+      "epoch": 1.8315018315018317,
+      "grad_norm": 0.0001556806091684848,
+      "learning_rate": 7.790964590964591e-06,
+      "loss": 0.0,
+      "step": 12500
+    },
+    {
+      "epoch": 1.8461538461538463,
+      "grad_norm": 1.0768227577209473,
+      "learning_rate": 7.693284493284495e-06,
+      "loss": 0.0003,
+      "step": 12600
+    },
+    {
+      "epoch": 1.8608058608058609,
+      "grad_norm": 0.00020022221724502742,
+      "learning_rate": 7.595604395604397e-06,
+      "loss": 0.0001,
+      "step": 12700
+    },
+    {
+      "epoch": 1.8754578754578755,
+      "grad_norm": 0.00016349930956494063,
+      "learning_rate": 7.497924297924299e-06,
+      "loss": 0.0,
+      "step": 12800
+    },
+    {
+      "epoch": 1.89010989010989,
+      "grad_norm": 0.0001262535952264443,
+      "learning_rate": 7.400244200244201e-06,
+      "loss": 0.0,
+      "step": 12900
+    },
+    {
+      "epoch": 1.9047619047619047,
+      "grad_norm": 0.003379235276952386,
+      "learning_rate": 7.302564102564103e-06,
+      "loss": 0.0003,
+      "step": 13000
+    },
+    {
+      "epoch": 1.9194139194139193,
+      "grad_norm": 0.00012544514902401716,
+      "learning_rate": 7.204884004884005e-06,
+      "loss": 0.0,
+      "step": 13100
+    },
+    {
+      "epoch": 1.934065934065934,
+      "grad_norm": 0.0004016385355498642,
+      "learning_rate": 7.107203907203908e-06,
+      "loss": 0.0,
+      "step": 13200
+    },
+    {
+      "epoch": 1.9487179487179487,
+      "grad_norm": 0.0003825027379207313,
+      "learning_rate": 7.00952380952381e-06,
+      "loss": 0.0003,
+      "step": 13300
+    },
+    {
+      "epoch": 1.9633699633699635,
+      "grad_norm": 0.00017085819854401052,
+      "learning_rate": 6.911843711843712e-06,
+      "loss": 0.0,
+      "step": 13400
+    },
+    {
+      "epoch": 1.978021978021978,
+      "grad_norm": 0.0001403139322064817,
+      "learning_rate": 6.814163614163615e-06,
+      "loss": 0.0,
+      "step": 13500
+    },
+    {
+      "epoch": 1.9926739926739927,
+      "grad_norm": 0.003122469875961542,
+      "learning_rate": 6.716483516483518e-06,
+      "loss": 0.0,
+      "step": 13600
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9999990728172263,
+      "eval_f1": 0.9999544495224792,
+      "eval_loss": 1.237483047589194e-05,
+      "eval_precision": 0.999939266952112,
+      "eval_recall": 0.9999696325539023,
+      "eval_runtime": 267.2853,
+      "eval_samples_per_second": 102.138,
+      "eval_steps_per_second": 6.386,
+      "step": 13650
+    },
+    {
+      "epoch": 2.0073260073260073,
+      "grad_norm": 6.217395275598392e-05,
+      "learning_rate": 6.618803418803419e-06,
+      "loss": 0.0,
+      "step": 13700
+    },
+    {
+      "epoch": 2.021978021978022,
+      "grad_norm": 0.0001487318950239569,
+      "learning_rate": 6.5211233211233216e-06,
+      "loss": 0.0003,
+      "step": 13800
+    },
+    {
+      "epoch": 2.0366300366300365,
+      "grad_norm": 0.00045171970850788057,
+      "learning_rate": 6.423443223443224e-06,
+      "loss": 0.0001,
+      "step": 13900
+    },
+    {
+      "epoch": 2.051282051282051,
+      "grad_norm": 0.00022699052351526916,
+      "learning_rate": 6.3257631257631255e-06,
+      "loss": 0.0,
+      "step": 14000
+    },
+    {
+      "epoch": 2.065934065934066,
+      "grad_norm": 9.758808300830424e-05,
+      "learning_rate": 6.228083028083028e-06,
+      "loss": 0.0,
+      "step": 14100
+    },
+    {
+      "epoch": 2.0805860805860807,
+      "grad_norm": 0.00048689660616219044,
+      "learning_rate": 6.130402930402931e-06,
+      "loss": 0.0,
+      "step": 14200
+    },
+    {
+      "epoch": 2.0952380952380953,
+      "grad_norm": 7.219218969112262e-05,
+      "learning_rate": 6.032722832722834e-06,
+      "loss": 0.0,
+      "step": 14300
+    },
+    {
+      "epoch": 2.10989010989011,
+      "grad_norm": 0.00010942752123810351,
+      "learning_rate": 5.935042735042735e-06,
+      "loss": 0.0002,
+      "step": 14400
+    },
+    {
+      "epoch": 2.1245421245421245,
+      "grad_norm": 0.00013398531882558018,
+      "learning_rate": 5.837362637362638e-06,
+      "loss": 0.0,
+      "step": 14500
+    },
+    {
+      "epoch": 2.139194139194139,
+      "grad_norm": 0.00013351649977266788,
+      "learning_rate": 5.739682539682541e-06,
+      "loss": 0.0,
+      "step": 14600
+    },
+    {
+      "epoch": 2.1538461538461537,
+      "grad_norm": 8.359822095371783e-05,
+      "learning_rate": 5.6420024420024425e-06,
+      "loss": 0.0,
+      "step": 14700
+    },
+    {
+      "epoch": 2.1684981684981683,
+      "grad_norm": 7.686048775212839e-05,
+      "learning_rate": 5.5443223443223445e-06,
+      "loss": 0.0,
+      "step": 14800
+    },
+    {
+      "epoch": 2.183150183150183,
+      "grad_norm": 0.00011812122829724103,
+      "learning_rate": 5.446642246642247e-06,
+      "loss": 0.0,
+      "step": 14900
+    },
+    {
+      "epoch": 2.197802197802198,
+      "grad_norm": 0.00044185685692355037,
+      "learning_rate": 5.348962148962149e-06,
+      "loss": 0.0001,
+      "step": 15000
+    },
+    {
+      "epoch": 2.2124542124542126,
+      "grad_norm": 0.00013721364666707814,
+      "learning_rate": 5.251282051282052e-06,
+      "loss": 0.0001,
+      "step": 15100
+    },
+    {
+      "epoch": 2.227106227106227,
+      "grad_norm": 0.00018953319522552192,
+      "learning_rate": 5.153601953601954e-06,
+      "loss": 0.0,
+      "step": 15200
+    },
+    {
+      "epoch": 2.241758241758242,
+      "grad_norm": 0.0003042246389668435,
+      "learning_rate": 5.055921855921856e-06,
+      "loss": 0.0,
+      "step": 15300
+    },
+    {
+      "epoch": 2.2564102564102564,
+      "grad_norm": 7.083150558173656e-05,
+      "learning_rate": 4.958241758241759e-06,
+      "loss": 0.0,
+      "step": 15400
+    },
+    {
+      "epoch": 2.271062271062271,
+      "grad_norm": 8.48570343805477e-05,
+      "learning_rate": 4.8605616605616616e-06,
+      "loss": 0.0,
+      "step": 15500
+    },
+    {
+      "epoch": 2.2857142857142856,
+      "grad_norm": 7.620136602781713e-05,
+      "learning_rate": 4.7628815628815635e-06,
+      "loss": 0.0,
+      "step": 15600
+    },
+    {
+      "epoch": 2.3003663003663,
+      "grad_norm": 6.34704774711281e-05,
+      "learning_rate": 4.6652014652014655e-06,
+      "loss": 0.0,
+      "step": 15700
+    },
+    {
+      "epoch": 2.315018315018315,
+      "grad_norm": 8.851837628753856e-05,
+      "learning_rate": 4.567521367521368e-06,
+      "loss": 0.0001,
+      "step": 15800
+    },
+    {
+      "epoch": 2.32967032967033,
+      "grad_norm": 0.0001343141047982499,
+      "learning_rate": 4.46984126984127e-06,
+      "loss": 0.0001,
+      "step": 15900
+    },
+    {
+      "epoch": 2.3443223443223444,
+      "grad_norm": 0.0001232538343174383,
+      "learning_rate": 4.372161172161172e-06,
+      "loss": 0.0,
+      "step": 16000
+    },
+    {
+      "epoch": 2.358974358974359,
+      "grad_norm": 5.023027551942505e-05,
+      "learning_rate": 4.274481074481075e-06,
+      "loss": 0.0,
+      "step": 16100
+    },
+    {
+      "epoch": 2.3736263736263736,
+      "grad_norm": 8.723604696569964e-05,
+      "learning_rate": 4.176800976800977e-06,
+      "loss": 0.0,
+      "step": 16200
+    },
+    {
+      "epoch": 2.3882783882783882,
+      "grad_norm": 5.652988329529762e-05,
+      "learning_rate": 4.07912087912088e-06,
+      "loss": 0.0,
+      "step": 16300
+    },
+    {
+      "epoch": 2.402930402930403,
+      "grad_norm": 6.024859976605512e-05,
+      "learning_rate": 3.981440781440782e-06,
+      "loss": 0.0,
+      "step": 16400
+    },
+    {
+      "epoch": 2.4175824175824174,
+      "grad_norm": 4.5512519136536866e-05,
+      "learning_rate": 3.883760683760684e-06,
+      "loss": 0.0,
+      "step": 16500
+    },
+    {
+      "epoch": 2.4322344322344325,
+      "grad_norm": 6.305933493422344e-05,
+      "learning_rate": 3.7860805860805864e-06,
+      "loss": 0.0,
+      "step": 16600
+    },
+    {
+      "epoch": 2.446886446886447,
+      "grad_norm": 0.00021723458485212177,
+      "learning_rate": 3.688400488400489e-06,
+      "loss": 0.0,
+      "step": 16700
+    },
+    {
+      "epoch": 2.4615384615384617,
+      "grad_norm": 0.00010556123743299395,
+      "learning_rate": 3.5907203907203908e-06,
+      "loss": 0.0,
+      "step": 16800
+    },
+    {
+      "epoch": 2.4761904761904763,
+      "grad_norm": 4.880682536168024e-05,
+      "learning_rate": 3.4930402930402936e-06,
+      "loss": 0.0,
+      "step": 16900
+    },
+    {
+      "epoch": 2.490842490842491,
+      "grad_norm": 4.9467933422420174e-05,
+      "learning_rate": 3.3953601953601955e-06,
+      "loss": 0.0,
+      "step": 17000
+    },
+    {
+      "epoch": 2.5054945054945055,
+      "grad_norm": 6.394098454620689e-05,
+      "learning_rate": 3.2976800976800983e-06,
+      "loss": 0.0,
+      "step": 17100
+    },
+    {
+      "epoch": 2.52014652014652,
+      "grad_norm": 0.0005223629996180534,
+      "learning_rate": 3.2000000000000003e-06,
+      "loss": 0.0,
+      "step": 17200
+    },
+    {
+      "epoch": 2.5347985347985347,
+      "grad_norm": 3.2207102776737884e-05,
+      "learning_rate": 3.1023199023199022e-06,
+      "loss": 0.0,
+      "step": 17300
+    },
+    {
+      "epoch": 2.5494505494505493,
+      "grad_norm": 6.0004018450854346e-05,
+      "learning_rate": 3.004639804639805e-06,
+      "loss": 0.0001,
+      "step": 17400
+    },
+    {
+      "epoch": 2.564102564102564,
+      "grad_norm": 3.9327616832451895e-05,
+      "learning_rate": 2.906959706959707e-06,
+      "loss": 0.0,
+      "step": 17500
+    },
+    {
+      "epoch": 2.578754578754579,
+      "grad_norm": 4.118070864933543e-05,
+      "learning_rate": 2.8092796092796094e-06,
+      "loss": 0.0,
+      "step": 17600
+    },
+    {
+      "epoch": 2.5934065934065935,
+      "grad_norm": 4.89596750412602e-05,
+      "learning_rate": 2.711599511599512e-06,
+      "loss": 0.0001,
+      "step": 17700
+    },
+    {
+      "epoch": 2.608058608058608,
+      "grad_norm": 5.990030331304297e-05,
+      "learning_rate": 2.613919413919414e-06,
+      "loss": 0.0,
+      "step": 17800
+    },
+    {
+      "epoch": 2.6227106227106227,
+      "grad_norm": 3.487396315904334e-05,
+      "learning_rate": 2.516239316239317e-06,
+      "loss": 0.0002,
+      "step": 17900
+    },
+    {
+      "epoch": 2.6373626373626373,
+      "grad_norm": 2.6920677555608563e-05,
+      "learning_rate": 2.418559218559219e-06,
+      "loss": 0.0001,
+      "step": 18000
+    },
+    {
+      "epoch": 2.652014652014652,
+      "grad_norm": 5.215009514358826e-05,
+      "learning_rate": 2.3208791208791213e-06,
+      "loss": 0.0,
+      "step": 18100
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 4.3928706872975454e-05,
+      "learning_rate": 2.223199023199023e-06,
+      "loss": 0.0,
+      "step": 18200
+    },
+    {
+      "epoch": 2.6813186813186816,
+      "grad_norm": 0.004979077726602554,
+      "learning_rate": 2.1255189255189256e-06,
+      "loss": 0.0,
+      "step": 18300
+    },
+    {
+      "epoch": 2.695970695970696,
+      "grad_norm": 4.079126665601507e-05,
+      "learning_rate": 2.027838827838828e-06,
+      "loss": 0.0,
+      "step": 18400
+    },
+    {
+      "epoch": 2.7106227106227108,
+      "grad_norm": 3.755389479920268e-05,
+      "learning_rate": 1.9301587301587303e-06,
+      "loss": 0.0,
+      "step": 18500
+    },
+    {
+      "epoch": 2.7252747252747254,
+      "grad_norm": 3.5347176890354604e-05,
+      "learning_rate": 1.8324786324786325e-06,
+      "loss": 0.0,
+      "step": 18600
+    },
+    {
+      "epoch": 2.73992673992674,
+      "grad_norm": 6.854772072983906e-05,
+      "learning_rate": 1.7347985347985349e-06,
+      "loss": 0.0,
+      "step": 18700
+    },
+    {
+      "epoch": 2.7545787545787546,
+      "grad_norm": 5.984567178529687e-05,
+      "learning_rate": 1.6371184371184373e-06,
+      "loss": 0.0,
+      "step": 18800
+    },
+    {
+      "epoch": 2.769230769230769,
+      "grad_norm": 6.427949119824916e-05,
+      "learning_rate": 1.5394383394383396e-06,
+      "loss": 0.0001,
+      "step": 18900
+    },
+    {
+      "epoch": 2.7838827838827838,
+      "grad_norm": 2.7727375709218904e-05,
+      "learning_rate": 1.4417582417582418e-06,
+      "loss": 0.0,
+      "step": 19000
+    },
+    {
+      "epoch": 2.7985347985347984,
+      "grad_norm": 3.604817175073549e-05,
+      "learning_rate": 1.3440781440781442e-06,
+      "loss": 0.0,
+      "step": 19100
+    },
+    {
+      "epoch": 2.813186813186813,
+      "grad_norm": 9.17447468964383e-05,
+      "learning_rate": 1.2463980463980466e-06,
+      "loss": 0.0,
+      "step": 19200
+    },
+    {
+      "epoch": 2.8278388278388276,
+      "grad_norm": 3.2978157832985744e-05,
+      "learning_rate": 1.1487179487179487e-06,
+      "loss": 0.0,
+      "step": 19300
+    },
+    {
+      "epoch": 2.8424908424908426,
+      "grad_norm": 4.4227072066860273e-05,
+      "learning_rate": 1.051037851037851e-06,
+      "loss": 0.0,
+      "step": 19400
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 3.573105641407892e-05,
+      "learning_rate": 9.533577533577535e-07,
+      "loss": 0.0,
+      "step": 19500
+    },
+    {
+      "epoch": 2.871794871794872,
+      "grad_norm": 5.604741090792231e-05,
+      "learning_rate": 8.556776556776559e-07,
+      "loss": 0.0,
+      "step": 19600
+    },
+    {
+      "epoch": 2.8864468864468864,
+      "grad_norm": 6.343449058476835e-05,
+      "learning_rate": 7.57997557997558e-07,
+      "loss": 0.0,
+      "step": 19700
+    },
+    {
+      "epoch": 2.901098901098901,
+      "grad_norm": 2.3594573576701805e-05,
+      "learning_rate": 6.603174603174604e-07,
+      "loss": 0.0,
+      "step": 19800
+    },
+    {
+      "epoch": 2.9157509157509156,
+      "grad_norm": 4.237998291500844e-05,
+      "learning_rate": 5.626373626373627e-07,
+      "loss": 0.0,
+      "step": 19900
+    },
+    {
+      "epoch": 2.9304029304029307,
+      "grad_norm": 4.4342101318761706e-05,
+      "learning_rate": 4.64957264957265e-07,
+      "loss": 0.0,
+      "step": 20000
+    },
+    {
+      "epoch": 2.9450549450549453,
+      "grad_norm": 4.6577501052524894e-05,
+      "learning_rate": 3.672771672771673e-07,
+      "loss": 0.0,
+      "step": 20100
+    },
+    {
+      "epoch": 2.95970695970696,
+      "grad_norm": 3.366880628163926e-05,
+      "learning_rate": 2.695970695970696e-07,
+      "loss": 0.0,
+      "step": 20200
+    },
+    {
+      "epoch": 2.9743589743589745,
+      "grad_norm": 4.93158950121142e-05,
+      "learning_rate": 1.7191697191697192e-07,
+      "loss": 0.0,
+      "step": 20300
+    },
+    {
+      "epoch": 2.989010989010989,
+      "grad_norm": 3.799773912760429e-05,
+      "learning_rate": 7.423687423687424e-08,
+      "loss": 0.0,
+      "step": 20400
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 1.0,
+      "eval_f1": 1.0,
+      "eval_loss": 1.2949521988048218e-06,
+      "eval_precision": 1.0,
+      "eval_recall": 1.0,
+      "eval_runtime": 266.3604,
+      "eval_samples_per_second": 102.493,
+      "eval_steps_per_second": 6.409,
+      "step": 20475
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 20475,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.5970716030629024e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdc84b0e9c686c061974cd287d3c24839f53f2c3972ac34b3199981e7073d4c6
+size 5368

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff