Shrav20 commited on Sep 21, 2025

Commit

a9d8944

verified ·

1 Parent(s): 84c8c87

Upload folder using huggingface_hub

Browse files

Files changed (26) hide show

checkpoint-11000/added_tokens.json +3 -0
checkpoint-11000/config.json +59 -0
checkpoint-11000/model.safetensors +3 -0
checkpoint-11000/optimizer.pt +3 -0
checkpoint-11000/rng_state.pth +3 -0
checkpoint-11000/scheduler.pt +3 -0
checkpoint-11000/special_tokens_map.json +15 -0
checkpoint-11000/spm.model +3 -0
checkpoint-11000/tokenizer.json +0 -0
checkpoint-11000/tokenizer_config.json +59 -0
checkpoint-11000/trainer_state.json +1610 -0
checkpoint-11000/training_args.bin +3 -0
checkpoint-11250/added_tokens.json +3 -0
checkpoint-11250/config.json +59 -0
checkpoint-11250/model.safetensors +3 -0
checkpoint-11250/optimizer.pt +3 -0
checkpoint-11250/rng_state.pth +3 -0
checkpoint-11250/scheduler.pt +3 -0
checkpoint-11250/special_tokens_map.json +15 -0
checkpoint-11250/spm.model +3 -0
checkpoint-11250/tokenizer.json +0 -0
checkpoint-11250/tokenizer_config.json +59 -0
checkpoint-11250/trainer_state.json +1645 -0
checkpoint-11250/training_args.bin +3 -0
config.json +18 -18
training_args.bin +1 -1

checkpoint-11000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

checkpoint-11000/config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "architectures": [
+    "DebertaV2ForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 1,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8
+  },
+  "layer_norm_eps": 1e-07,
+  "legacy": true,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "transformers_version": "4.56.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

checkpoint-11000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7be134a213b3f5ba1c8cbd8675288bf98b8eca7b890f57bb1ced689d13b1580d
+size 735378268

checkpoint-11000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb94ae915eb967a324195739cd401341c643a77a1ebd9c3e8e177a721faca63d
+size 1470878283

checkpoint-11000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27c48ae1baa095d00c34734e0c44a3088c7026668efcbb7c0c18c9c8a0b52854
+size 14645

checkpoint-11000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4465649d300de9496daf05727c4662b51bf21545195893b7cb1a08daed34ad9
+size 1465

checkpoint-11000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-11000/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

checkpoint-11000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-11000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

checkpoint-11000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1610 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.888888888888889,
+  "eval_steps": 500,
+  "global_step": 11000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.022222222222222223,
+      "grad_norm": 0.4169579744338989,
+      "learning_rate": 4.9782222222222224e-05,
+      "loss": 0.2263,
+      "step": 50
+    },
+    {
+      "epoch": 0.044444444444444446,
+      "grad_norm": 0.33507040143013,
+      "learning_rate": 4.956e-05,
+      "loss": 0.0174,
+      "step": 100
+    },
+    {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 0.11501526087522507,
+      "learning_rate": 4.933777777777778e-05,
+      "loss": 0.0101,
+      "step": 150
+    },
+    {
+      "epoch": 0.08888888888888889,
+      "grad_norm": 0.09099342674016953,
+      "learning_rate": 4.911555555555556e-05,
+      "loss": 0.0061,
+      "step": 200
+    },
+    {
+      "epoch": 0.1111111111111111,
+      "grad_norm": 0.08669792860746384,
+      "learning_rate": 4.8893333333333335e-05,
+      "loss": 0.0036,
+      "step": 250
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.07038326561450958,
+      "learning_rate": 4.867111111111111e-05,
+      "loss": 0.0037,
+      "step": 300
+    },
+    {
+      "epoch": 0.15555555555555556,
+      "grad_norm": 0.08929373323917389,
+      "learning_rate": 4.8448888888888894e-05,
+      "loss": 0.0025,
+      "step": 350
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 0.010555864311754704,
+      "learning_rate": 4.822666666666667e-05,
+      "loss": 0.0011,
+      "step": 400
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.12513870000839233,
+      "learning_rate": 4.8004444444444446e-05,
+      "loss": 0.0014,
+      "step": 450
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 0.008826244622468948,
+      "learning_rate": 4.778222222222222e-05,
+      "loss": 0.0009,
+      "step": 500
+    },
+    {
+      "epoch": 0.24444444444444444,
+      "grad_norm": 0.3243519067764282,
+      "learning_rate": 4.7560000000000005e-05,
+      "loss": 0.0003,
+      "step": 550
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.013048608787357807,
+      "learning_rate": 4.733777777777778e-05,
+      "loss": 0.003,
+      "step": 600
+    },
+    {
+      "epoch": 0.28888888888888886,
+      "grad_norm": 0.004392141476273537,
+      "learning_rate": 4.711555555555556e-05,
+      "loss": 0.0017,
+      "step": 650
+    },
+    {
+      "epoch": 0.3111111111111111,
+      "grad_norm": 0.009453477337956429,
+      "learning_rate": 4.6893333333333334e-05,
+      "loss": 0.0007,
+      "step": 700
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.29423239827156067,
+      "learning_rate": 4.667111111111112e-05,
+      "loss": 0.0004,
+      "step": 750
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 0.007287424057722092,
+      "learning_rate": 4.644888888888889e-05,
+      "loss": 0.0004,
+      "step": 800
+    },
+    {
+      "epoch": 0.37777777777777777,
+      "grad_norm": 0.006468615494668484,
+      "learning_rate": 4.622666666666667e-05,
+      "loss": 0.001,
+      "step": 850
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.0032176165841519833,
+      "learning_rate": 4.6004444444444445e-05,
+      "loss": 0.0013,
+      "step": 900
+    },
+    {
+      "epoch": 0.4222222222222222,
+      "grad_norm": 0.0045009879395365715,
+      "learning_rate": 4.578222222222223e-05,
+      "loss": 0.0009,
+      "step": 950
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 0.04195983707904816,
+      "learning_rate": 4.5560000000000004e-05,
+      "loss": 0.0002,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 0.0009760915418155491,
+      "learning_rate": 4.533777777777778e-05,
+      "loss": 0.0001,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4888888888888889,
+      "grad_norm": 0.0008424059487879276,
+      "learning_rate": 4.5115555555555557e-05,
+      "loss": 0.0001,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5111111111111111,
+      "grad_norm": 0.001366421696729958,
+      "learning_rate": 4.489333333333334e-05,
+      "loss": 0.0001,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.0733928307890892,
+      "learning_rate": 4.4671111111111116e-05,
+      "loss": 0.0001,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 0.005551271606236696,
+      "learning_rate": 4.444888888888889e-05,
+      "loss": 0.0008,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5777777777777777,
+      "grad_norm": 0.0026340284384787083,
+      "learning_rate": 4.422666666666667e-05,
+      "loss": 0.0002,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.002755340188741684,
+      "learning_rate": 4.400444444444445e-05,
+      "loss": 0.0001,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6222222222222222,
+      "grad_norm": 0.0008212323882617056,
+      "learning_rate": 4.378222222222223e-05,
+      "loss": 0.0,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6444444444444445,
+      "grad_norm": 0.0005921730189584196,
+      "learning_rate": 4.356e-05,
+      "loss": 0.0,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.0008460666285827756,
+      "learning_rate": 4.333777777777778e-05,
+      "loss": 0.0001,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6888888888888889,
+      "grad_norm": 0.0006111777038313448,
+      "learning_rate": 4.311555555555556e-05,
+      "loss": 0.0,
+      "step": 1550
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 0.00046472539543174207,
+      "learning_rate": 4.289333333333334e-05,
+      "loss": 0.0,
+      "step": 1600
+    },
+    {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 0.0007363149197772145,
+      "learning_rate": 4.2671111111111114e-05,
+      "loss": 0.0,
+      "step": 1650
+    },
+    {
+      "epoch": 0.7555555555555555,
+      "grad_norm": 0.0006023353198543191,
+      "learning_rate": 4.244888888888889e-05,
+      "loss": 0.0,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 0.0004273591039236635,
+      "learning_rate": 4.222666666666667e-05,
+      "loss": 0.0,
+      "step": 1750
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.0006913533434271812,
+      "learning_rate": 4.200444444444445e-05,
+      "loss": 0.0,
+      "step": 1800
+    },
+    {
+      "epoch": 0.8222222222222222,
+      "grad_norm": 0.0014112872304394841,
+      "learning_rate": 4.1782222222222226e-05,
+      "loss": 0.0002,
+      "step": 1850
+    },
+    {
+      "epoch": 0.8444444444444444,
+      "grad_norm": 0.0024269793648272753,
+      "learning_rate": 4.156e-05,
+      "loss": 0.0005,
+      "step": 1900
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 0.46272382140159607,
+      "learning_rate": 4.133777777777778e-05,
+      "loss": 0.0013,
+      "step": 1950
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.006871496792882681,
+      "learning_rate": 4.1115555555555554e-05,
+      "loss": 0.001,
+      "step": 2000
+    },
+    {
+      "epoch": 0.9111111111111111,
+      "grad_norm": 0.0022507584653794765,
+      "learning_rate": 4.089333333333333e-05,
+      "loss": 0.001,
+      "step": 2050
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.0077248550951480865,
+      "learning_rate": 4.067111111111111e-05,
+      "loss": 0.0005,
+      "step": 2100
+    },
+    {
+      "epoch": 0.9555555555555556,
+      "grad_norm": 0.0006559474277310073,
+      "learning_rate": 4.044888888888889e-05,
+      "loss": 0.0003,
+      "step": 2150
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 0.000827161071356386,
+      "learning_rate": 4.0226666666666666e-05,
+      "loss": 0.0012,
+      "step": 2200
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.22399061918258667,
+      "learning_rate": 4.000444444444444e-05,
+      "loss": 0.0005,
+      "step": 2250
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.0001230358611792326,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      8666\n    EDUCATION       1.00      1.00      1.00     10841\n     LANGUAGE       1.00      1.00      1.00      4050\n        SKILL       1.00      1.00      1.00     14410\n\n    micro avg       1.00      1.00      1.00     37967\n    macro avg       1.00      1.00      1.00     37967\n weighted avg       1.00      1.00      1.00     37967\n",
+      "eval_runtime": 39.9747,
+      "eval_samples_per_second": 50.032,
+      "eval_steps_per_second": 6.254,
+      "step": 2250
+    },
+    {
+      "epoch": 1.0222222222222221,
+      "grad_norm": 0.0034671323373913765,
+      "learning_rate": 3.9782222222222225e-05,
+      "loss": 0.0005,
+      "step": 2300
+    },
+    {
+      "epoch": 1.0444444444444445,
+      "grad_norm": 0.005704512819647789,
+      "learning_rate": 3.956e-05,
+      "loss": 0.0024,
+      "step": 2350
+    },
+    {
+      "epoch": 1.0666666666666667,
+      "grad_norm": 0.0016336466651409864,
+      "learning_rate": 3.933777777777778e-05,
+      "loss": 0.0004,
+      "step": 2400
+    },
+    {
+      "epoch": 1.0888888888888888,
+      "grad_norm": 0.0009551944676786661,
+      "learning_rate": 3.911555555555555e-05,
+      "loss": 0.0002,
+      "step": 2450
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.0004279686836525798,
+      "learning_rate": 3.8893333333333336e-05,
+      "loss": 0.0001,
+      "step": 2500
+    },
+    {
+      "epoch": 1.1333333333333333,
+      "grad_norm": 0.00043013374670408666,
+      "learning_rate": 3.867111111111111e-05,
+      "loss": 0.0001,
+      "step": 2550
+    },
+    {
+      "epoch": 1.1555555555555554,
+      "grad_norm": 0.00034906569635495543,
+      "learning_rate": 3.844888888888889e-05,
+      "loss": 0.0,
+      "step": 2600
+    },
+    {
+      "epoch": 1.1777777777777778,
+      "grad_norm": 0.0014709575334563851,
+      "learning_rate": 3.8226666666666664e-05,
+      "loss": 0.0002,
+      "step": 2650
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.0006022250163368881,
+      "learning_rate": 3.800444444444445e-05,
+      "loss": 0.0,
+      "step": 2700
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 0.000542789523024112,
+      "learning_rate": 3.778222222222222e-05,
+      "loss": 0.0001,
+      "step": 2750
+    },
+    {
+      "epoch": 1.2444444444444445,
+      "grad_norm": 0.00048277038149535656,
+      "learning_rate": 3.756e-05,
+      "loss": 0.0002,
+      "step": 2800
+    },
+    {
+      "epoch": 1.2666666666666666,
+      "grad_norm": 0.0003071363898925483,
+      "learning_rate": 3.7337777777777776e-05,
+      "loss": 0.0,
+      "step": 2850
+    },
+    {
+      "epoch": 1.2888888888888888,
+      "grad_norm": 0.0003033171233255416,
+      "learning_rate": 3.711555555555556e-05,
+      "loss": 0.0,
+      "step": 2900
+    },
+    {
+      "epoch": 1.3111111111111111,
+      "grad_norm": 0.001257324474863708,
+      "learning_rate": 3.6893333333333335e-05,
+      "loss": 0.0011,
+      "step": 2950
+    },
+    {
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.0003226712578907609,
+      "learning_rate": 3.667111111111111e-05,
+      "loss": 0.0002,
+      "step": 3000
+    },
+    {
+      "epoch": 1.3555555555555556,
+      "grad_norm": 0.00033422038541175425,
+      "learning_rate": 3.644888888888889e-05,
+      "loss": 0.0001,
+      "step": 3050
+    },
+    {
+      "epoch": 1.3777777777777778,
+      "grad_norm": 0.0004989306908100843,
+      "learning_rate": 3.622666666666667e-05,
+      "loss": 0.0,
+      "step": 3100
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 0.0002919055405072868,
+      "learning_rate": 3.6004444444444446e-05,
+      "loss": 0.0,
+      "step": 3150
+    },
+    {
+      "epoch": 1.4222222222222223,
+      "grad_norm": 0.00027494438108988106,
+      "learning_rate": 3.578222222222222e-05,
+      "loss": 0.0,
+      "step": 3200
+    },
+    {
+      "epoch": 1.4444444444444444,
+      "grad_norm": 0.000610452436376363,
+      "learning_rate": 3.5560000000000005e-05,
+      "loss": 0.0,
+      "step": 3250
+    },
+    {
+      "epoch": 1.4666666666666668,
+      "grad_norm": 0.000538027728907764,
+      "learning_rate": 3.533777777777778e-05,
+      "loss": 0.0,
+      "step": 3300
+    },
+    {
+      "epoch": 1.488888888888889,
+      "grad_norm": 0.00020039589435327798,
+      "learning_rate": 3.511555555555556e-05,
+      "loss": 0.0,
+      "step": 3350
+    },
+    {
+      "epoch": 1.511111111111111,
+      "grad_norm": 0.00018263465608470142,
+      "learning_rate": 3.4893333333333334e-05,
+      "loss": 0.0,
+      "step": 3400
+    },
+    {
+      "epoch": 1.5333333333333332,
+      "grad_norm": 0.00019636489741969854,
+      "learning_rate": 3.4671111111111116e-05,
+      "loss": 0.0,
+      "step": 3450
+    },
+    {
+      "epoch": 1.5555555555555556,
+      "grad_norm": 0.00028676423244178295,
+      "learning_rate": 3.444888888888889e-05,
+      "loss": 0.0,
+      "step": 3500
+    },
+    {
+      "epoch": 1.5777777777777777,
+      "grad_norm": 0.00019564179820008576,
+      "learning_rate": 3.422666666666667e-05,
+      "loss": 0.0,
+      "step": 3550
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.00020010425942018628,
+      "learning_rate": 3.4004444444444445e-05,
+      "loss": 0.0,
+      "step": 3600
+    },
+    {
+      "epoch": 1.6222222222222222,
+      "grad_norm": 0.00015156572044361383,
+      "learning_rate": 3.378222222222223e-05,
+      "loss": 0.0,
+      "step": 3650
+    },
+    {
+      "epoch": 1.6444444444444444,
+      "grad_norm": 0.0001559116062708199,
+      "learning_rate": 3.3560000000000004e-05,
+      "loss": 0.0,
+      "step": 3700
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.00017668222426436841,
+      "learning_rate": 3.333777777777778e-05,
+      "loss": 0.0,
+      "step": 3750
+    },
+    {
+      "epoch": 1.6888888888888889,
+      "grad_norm": 0.00015294237527996302,
+      "learning_rate": 3.3115555555555556e-05,
+      "loss": 0.0,
+      "step": 3800
+    },
+    {
+      "epoch": 1.7111111111111112,
+      "grad_norm": 0.00020564977603498846,
+      "learning_rate": 3.289333333333334e-05,
+      "loss": 0.0,
+      "step": 3850
+    },
+    {
+      "epoch": 1.7333333333333334,
+      "grad_norm": 0.00015193119179457426,
+      "learning_rate": 3.2671111111111115e-05,
+      "loss": 0.0,
+      "step": 3900
+    },
+    {
+      "epoch": 1.7555555555555555,
+      "grad_norm": 0.00013517968181986362,
+      "learning_rate": 3.244888888888889e-05,
+      "loss": 0.0,
+      "step": 3950
+    },
+    {
+      "epoch": 1.7777777777777777,
+      "grad_norm": 0.00027755036717280746,
+      "learning_rate": 3.222666666666667e-05,
+      "loss": 0.0,
+      "step": 4000
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 0.0001287544146180153,
+      "learning_rate": 3.200444444444445e-05,
+      "loss": 0.0,
+      "step": 4050
+    },
+    {
+      "epoch": 1.8222222222222222,
+      "grad_norm": 0.0001332290848949924,
+      "learning_rate": 3.178222222222223e-05,
+      "loss": 0.0,
+      "step": 4100
+    },
+    {
+      "epoch": 1.8444444444444446,
+      "grad_norm": 0.00012569426326081157,
+      "learning_rate": 3.156e-05,
+      "loss": 0.0,
+      "step": 4150
+    },
+    {
+      "epoch": 1.8666666666666667,
+      "grad_norm": 0.00011655821435851976,
+      "learning_rate": 3.133777777777778e-05,
+      "loss": 0.0,
+      "step": 4200
+    },
+    {
+      "epoch": 1.8888888888888888,
+      "grad_norm": 0.0001188791575259529,
+      "learning_rate": 3.111555555555556e-05,
+      "loss": 0.0,
+      "step": 4250
+    },
+    {
+      "epoch": 1.911111111111111,
+      "grad_norm": 0.00010238583490718156,
+      "learning_rate": 3.089333333333334e-05,
+      "loss": 0.0,
+      "step": 4300
+    },
+    {
+      "epoch": 1.9333333333333333,
+      "grad_norm": 0.0001092032398446463,
+      "learning_rate": 3.0671111111111114e-05,
+      "loss": 0.0,
+      "step": 4350
+    },
+    {
+      "epoch": 1.9555555555555557,
+      "grad_norm": 0.00014390636351890862,
+      "learning_rate": 3.0448888888888887e-05,
+      "loss": 0.0,
+      "step": 4400
+    },
+    {
+      "epoch": 1.9777777777777779,
+      "grad_norm": 0.00010023260256275535,
+      "learning_rate": 3.022666666666667e-05,
+      "loss": 0.0,
+      "step": 4450
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.00010638952517183498,
+      "learning_rate": 3.0004444444444446e-05,
+      "loss": 0.0,
+      "step": 4500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 1.410930394740717e-06,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      8666\n    EDUCATION       1.00      1.00      1.00     10841\n     LANGUAGE       1.00      1.00      1.00      4050\n        SKILL       1.00      1.00      1.00     14410\n\n    micro avg       1.00      1.00      1.00     37967\n    macro avg       1.00      1.00      1.00     37967\n weighted avg       1.00      1.00      1.00     37967\n",
+      "eval_runtime": 39.342,
+      "eval_samples_per_second": 50.836,
+      "eval_steps_per_second": 6.355,
+      "step": 4500
+    },
+    {
+      "epoch": 2.022222222222222,
+      "grad_norm": 0.00010032003774540499,
+      "learning_rate": 2.9782222222222222e-05,
+      "loss": 0.0,
+      "step": 4550
+    },
+    {
+      "epoch": 2.0444444444444443,
+      "grad_norm": 0.00048742775106802583,
+      "learning_rate": 2.9559999999999998e-05,
+      "loss": 0.0,
+      "step": 4600
+    },
+    {
+      "epoch": 2.066666666666667,
+      "grad_norm": 9.531196701573208e-05,
+      "learning_rate": 2.933777777777778e-05,
+      "loss": 0.0,
+      "step": 4650
+    },
+    {
+      "epoch": 2.088888888888889,
+      "grad_norm": 9.425494499737397e-05,
+      "learning_rate": 2.9115555555555557e-05,
+      "loss": 0.0,
+      "step": 4700
+    },
+    {
+      "epoch": 2.111111111111111,
+      "grad_norm": 0.00011574638483580202,
+      "learning_rate": 2.8893333333333333e-05,
+      "loss": 0.0,
+      "step": 4750
+    },
+    {
+      "epoch": 2.1333333333333333,
+      "grad_norm": 9.062536264536902e-05,
+      "learning_rate": 2.8671111111111116e-05,
+      "loss": 0.0,
+      "step": 4800
+    },
+    {
+      "epoch": 2.1555555555555554,
+      "grad_norm": 9.913908434100449e-05,
+      "learning_rate": 2.8448888888888892e-05,
+      "loss": 0.0,
+      "step": 4850
+    },
+    {
+      "epoch": 2.1777777777777776,
+      "grad_norm": 0.00010020509216701612,
+      "learning_rate": 2.822666666666667e-05,
+      "loss": 0.0,
+      "step": 4900
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 0.00012128752132412046,
+      "learning_rate": 2.8004444444444445e-05,
+      "loss": 0.0,
+      "step": 4950
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.02587141841650009,
+      "learning_rate": 2.7782222222222228e-05,
+      "loss": 0.0,
+      "step": 5000
+    },
+    {
+      "epoch": 2.2444444444444445,
+      "grad_norm": 8.068871829891577e-05,
+      "learning_rate": 2.7560000000000004e-05,
+      "loss": 0.0,
+      "step": 5050
+    },
+    {
+      "epoch": 2.2666666666666666,
+      "grad_norm": 8.70975709403865e-05,
+      "learning_rate": 2.733777777777778e-05,
+      "loss": 0.0,
+      "step": 5100
+    },
+    {
+      "epoch": 2.2888888888888888,
+      "grad_norm": 8.906049333745614e-05,
+      "learning_rate": 2.7115555555555556e-05,
+      "loss": 0.0,
+      "step": 5150
+    },
+    {
+      "epoch": 2.311111111111111,
+      "grad_norm": 7.555448246421292e-05,
+      "learning_rate": 2.6893333333333336e-05,
+      "loss": 0.0,
+      "step": 5200
+    },
+    {
+      "epoch": 2.3333333333333335,
+      "grad_norm": 0.00033314700704067945,
+      "learning_rate": 2.6671111111111112e-05,
+      "loss": 0.0,
+      "step": 5250
+    },
+    {
+      "epoch": 2.3555555555555556,
+      "grad_norm": 8.106895256787539e-05,
+      "learning_rate": 2.644888888888889e-05,
+      "loss": 0.0,
+      "step": 5300
+    },
+    {
+      "epoch": 2.3777777777777778,
+      "grad_norm": 0.0002253134734928608,
+      "learning_rate": 2.6226666666666667e-05,
+      "loss": 0.0002,
+      "step": 5350
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.04773925617337227,
+      "learning_rate": 2.6004444444444447e-05,
+      "loss": 0.0016,
+      "step": 5400
+    },
+    {
+      "epoch": 2.422222222222222,
+      "grad_norm": 0.013284893706440926,
+      "learning_rate": 2.5782222222222223e-05,
+      "loss": 0.0013,
+      "step": 5450
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 0.0019914733711630106,
+      "learning_rate": 2.556e-05,
+      "loss": 0.001,
+      "step": 5500
+    },
+    {
+      "epoch": 2.466666666666667,
+      "grad_norm": 0.00029465914121828973,
+      "learning_rate": 2.5337777777777775e-05,
+      "loss": 0.0001,
+      "step": 5550
+    },
+    {
+      "epoch": 2.488888888888889,
+      "grad_norm": 0.0010584425181150436,
+      "learning_rate": 2.5115555555555558e-05,
+      "loss": 0.0,
+      "step": 5600
+    },
+    {
+      "epoch": 2.511111111111111,
+      "grad_norm": 0.00024866798776201904,
+      "learning_rate": 2.4893333333333334e-05,
+      "loss": 0.0,
+      "step": 5650
+    },
+    {
+      "epoch": 2.533333333333333,
+      "grad_norm": 0.00015023746527731419,
+      "learning_rate": 2.467111111111111e-05,
+      "loss": 0.0,
+      "step": 5700
+    },
+    {
+      "epoch": 2.5555555555555554,
+      "grad_norm": 0.00020707614021375775,
+      "learning_rate": 2.444888888888889e-05,
+      "loss": 0.0,
+      "step": 5750
+    },
+    {
+      "epoch": 2.5777777777777775,
+      "grad_norm": 0.00013307879271451384,
+      "learning_rate": 2.4226666666666666e-05,
+      "loss": 0.0,
+      "step": 5800
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 0.00012964503548573703,
+      "learning_rate": 2.4004444444444446e-05,
+      "loss": 0.0,
+      "step": 5850
+    },
+    {
+      "epoch": 2.6222222222222222,
+      "grad_norm": 0.00012642733054235578,
+      "learning_rate": 2.3782222222222222e-05,
+      "loss": 0.0,
+      "step": 5900
+    },
+    {
+      "epoch": 2.6444444444444444,
+      "grad_norm": 0.00172146107070148,
+      "learning_rate": 2.356e-05,
+      "loss": 0.0005,
+      "step": 5950
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.00067210040288046,
+      "learning_rate": 2.3337777777777778e-05,
+      "loss": 0.0003,
+      "step": 6000
+    },
+    {
+      "epoch": 2.688888888888889,
+      "grad_norm": 0.0019879883620887995,
+      "learning_rate": 2.3115555555555557e-05,
+      "loss": 0.0001,
+      "step": 6050
+    },
+    {
+      "epoch": 2.7111111111111112,
+      "grad_norm": 0.00017190357903018594,
+      "learning_rate": 2.2893333333333333e-05,
+      "loss": 0.0,
+      "step": 6100
+    },
+    {
+      "epoch": 2.7333333333333334,
+      "grad_norm": 0.00015691675071138889,
+      "learning_rate": 2.2671111111111113e-05,
+      "loss": 0.0,
+      "step": 6150
+    },
+    {
+      "epoch": 2.7555555555555555,
+      "grad_norm": 0.0001467355468776077,
+      "learning_rate": 2.244888888888889e-05,
+      "loss": 0.0,
+      "step": 6200
+    },
+    {
+      "epoch": 2.7777777777777777,
+      "grad_norm": 0.00015619705663993955,
+      "learning_rate": 2.222666666666667e-05,
+      "loss": 0.0,
+      "step": 6250
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 0.0002816039486788213,
+      "learning_rate": 2.2004444444444445e-05,
+      "loss": 0.0,
+      "step": 6300
+    },
+    {
+      "epoch": 2.822222222222222,
+      "grad_norm": 0.0002559045678935945,
+      "learning_rate": 2.1782222222222224e-05,
+      "loss": 0.0,
+      "step": 6350
+    },
+    {
+      "epoch": 2.8444444444444446,
+      "grad_norm": 0.00017076350923161954,
+      "learning_rate": 2.1560000000000004e-05,
+      "loss": 0.0,
+      "step": 6400
+    },
+    {
+      "epoch": 2.8666666666666667,
+      "grad_norm": 9.32483744691126e-05,
+      "learning_rate": 2.133777777777778e-05,
+      "loss": 0.0,
+      "step": 6450
+    },
+    {
+      "epoch": 2.888888888888889,
+      "grad_norm": 9.684438555268571e-05,
+      "learning_rate": 2.111555555555556e-05,
+      "loss": 0.0,
+      "step": 6500
+    },
+    {
+      "epoch": 2.911111111111111,
+      "grad_norm": 0.00010544607357587665,
+      "learning_rate": 2.0893333333333335e-05,
+      "loss": 0.0,
+      "step": 6550
+    },
+    {
+      "epoch": 2.9333333333333336,
+      "grad_norm": 9.587592649040744e-05,
+      "learning_rate": 2.0671111111111115e-05,
+      "loss": 0.0,
+      "step": 6600
+    },
+    {
+      "epoch": 2.9555555555555557,
+      "grad_norm": 0.00025339287822134793,
+      "learning_rate": 2.044888888888889e-05,
+      "loss": 0.0,
+      "step": 6650
+    },
+    {
+      "epoch": 2.977777777777778,
+      "grad_norm": 8.56281112646684e-05,
+      "learning_rate": 2.0226666666666667e-05,
+      "loss": 0.0,
+      "step": 6700
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 8.848596917232499e-05,
+      "learning_rate": 2.0004444444444447e-05,
+      "loss": 0.0,
+      "step": 6750
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 9.946712680175551e-07,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      8666\n    EDUCATION       1.00      1.00      1.00     10841\n     LANGUAGE       1.00      1.00      1.00      4050\n        SKILL       1.00      1.00      1.00     14410\n\n    micro avg       1.00      1.00      1.00     37967\n    macro avg       1.00      1.00      1.00     37967\n weighted avg       1.00      1.00      1.00     37967\n",
+      "eval_runtime": 39.3914,
+      "eval_samples_per_second": 50.773,
+      "eval_steps_per_second": 6.347,
+      "step": 6750
+    },
+    {
+      "epoch": 3.022222222222222,
+      "grad_norm": 7.696033571846783e-05,
+      "learning_rate": 1.9782222222222223e-05,
+      "loss": 0.0,
+      "step": 6800
+    },
+    {
+      "epoch": 3.0444444444444443,
+      "grad_norm": 6.949145608814433e-05,
+      "learning_rate": 1.956e-05,
+      "loss": 0.0,
+      "step": 6850
+    },
+    {
+      "epoch": 3.066666666666667,
+      "grad_norm": 8.268443343695253e-05,
+      "learning_rate": 1.933777777777778e-05,
+      "loss": 0.0,
+      "step": 6900
+    },
+    {
+      "epoch": 3.088888888888889,
+      "grad_norm": 7.342168828472495e-05,
+      "learning_rate": 1.9115555555555555e-05,
+      "loss": 0.0,
+      "step": 6950
+    },
+    {
+      "epoch": 3.111111111111111,
+      "grad_norm": 6.869197386549786e-05,
+      "learning_rate": 1.8893333333333334e-05,
+      "loss": 0.0,
+      "step": 7000
+    },
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 6.17999758105725e-05,
+      "learning_rate": 1.867111111111111e-05,
+      "loss": 0.0,
+      "step": 7050
+    },
+    {
+      "epoch": 3.1555555555555554,
+      "grad_norm": 9.41220423555933e-05,
+      "learning_rate": 1.844888888888889e-05,
+      "loss": 0.0014,
+      "step": 7100
+    },
+    {
+      "epoch": 3.1777777777777776,
+      "grad_norm": 0.00021578388987109065,
+      "learning_rate": 1.8226666666666666e-05,
+      "loss": 0.0001,
+      "step": 7150
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.0057373447343707085,
+      "learning_rate": 1.8004444444444446e-05,
+      "loss": 0.0002,
+      "step": 7200
+    },
+    {
+      "epoch": 3.2222222222222223,
+      "grad_norm": 0.00015577590966131538,
+      "learning_rate": 1.7782222222222222e-05,
+      "loss": 0.0003,
+      "step": 7250
+    },
+    {
+      "epoch": 3.2444444444444445,
+      "grad_norm": 0.0001663435687078163,
+      "learning_rate": 1.756e-05,
+      "loss": 0.0,
+      "step": 7300
+    },
+    {
+      "epoch": 3.2666666666666666,
+      "grad_norm": 0.00011119741975562647,
+      "learning_rate": 1.7337777777777777e-05,
+      "loss": 0.0,
+      "step": 7350
+    },
+    {
+      "epoch": 3.2888888888888888,
+      "grad_norm": 0.0001374996645608917,
+      "learning_rate": 1.7115555555555557e-05,
+      "loss": 0.0,
+      "step": 7400
+    },
+    {
+      "epoch": 3.311111111111111,
+      "grad_norm": 0.00021004487643949687,
+      "learning_rate": 1.6893333333333333e-05,
+      "loss": 0.0,
+      "step": 7450
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.0001712692028377205,
+      "learning_rate": 1.6671111111111113e-05,
+      "loss": 0.0,
+      "step": 7500
+    },
+    {
+      "epoch": 3.3555555555555556,
+      "grad_norm": 9.832726209424436e-05,
+      "learning_rate": 1.644888888888889e-05,
+      "loss": 0.0,
+      "step": 7550
+    },
+    {
+      "epoch": 3.3777777777777778,
+      "grad_norm": 9.489347576163709e-05,
+      "learning_rate": 1.6226666666666668e-05,
+      "loss": 0.0,
+      "step": 7600
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 9.42831247812137e-05,
+      "learning_rate": 1.6004444444444444e-05,
+      "loss": 0.0,
+      "step": 7650
+    },
+    {
+      "epoch": 3.422222222222222,
+      "grad_norm": 0.0001237511314684525,
+      "learning_rate": 1.5782222222222224e-05,
+      "loss": 0.0,
+      "step": 7700
+    },
+    {
+      "epoch": 3.4444444444444446,
+      "grad_norm": 6.492211105069146e-05,
+      "learning_rate": 1.556e-05,
+      "loss": 0.0,
+      "step": 7750
+    },
+    {
+      "epoch": 3.466666666666667,
+      "grad_norm": 6.56497068121098e-05,
+      "learning_rate": 1.533777777777778e-05,
+      "loss": 0.0,
+      "step": 7800
+    },
+    {
+      "epoch": 3.488888888888889,
+      "grad_norm": 6.696177297271788e-05,
+      "learning_rate": 1.5115555555555556e-05,
+      "loss": 0.0,
+      "step": 7850
+    },
+    {
+      "epoch": 3.511111111111111,
+      "grad_norm": 5.779510684078559e-05,
+      "learning_rate": 1.4893333333333334e-05,
+      "loss": 0.0,
+      "step": 7900
+    },
+    {
+      "epoch": 3.533333333333333,
+      "grad_norm": 6.52693779557012e-05,
+      "learning_rate": 1.467111111111111e-05,
+      "loss": 0.0,
+      "step": 7950
+    },
+    {
+      "epoch": 3.5555555555555554,
+      "grad_norm": 9.678869537310675e-05,
+      "learning_rate": 1.444888888888889e-05,
+      "loss": 0.0,
+      "step": 8000
+    },
+    {
+      "epoch": 3.5777777777777775,
+      "grad_norm": 5.08545599586796e-05,
+      "learning_rate": 1.4226666666666669e-05,
+      "loss": 0.0,
+      "step": 8050
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 0.0001278165145777166,
+      "learning_rate": 1.4004444444444445e-05,
+      "loss": 0.0,
+      "step": 8100
+    },
+    {
+      "epoch": 3.6222222222222222,
+      "grad_norm": 5.6915894674602896e-05,
+      "learning_rate": 1.3782222222222224e-05,
+      "loss": 0.0,
+      "step": 8150
+    },
+    {
+      "epoch": 3.6444444444444444,
+      "grad_norm": 5.536643220693804e-05,
+      "learning_rate": 1.356e-05,
+      "loss": 0.0,
+      "step": 8200
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 6.497505819424987e-05,
+      "learning_rate": 1.333777777777778e-05,
+      "loss": 0.0,
+      "step": 8250
+    },
+    {
+      "epoch": 3.688888888888889,
+      "grad_norm": 4.67045720142778e-05,
+      "learning_rate": 1.3115555555555556e-05,
+      "loss": 0.0,
+      "step": 8300
+    },
+    {
+      "epoch": 3.7111111111111112,
+      "grad_norm": 5.038898962084204e-05,
+      "learning_rate": 1.2893333333333336e-05,
+      "loss": 0.0,
+      "step": 8350
+    },
+    {
+      "epoch": 3.7333333333333334,
+      "grad_norm": 4.441513374331407e-05,
+      "learning_rate": 1.2671111111111112e-05,
+      "loss": 0.0,
+      "step": 8400
+    },
+    {
+      "epoch": 3.7555555555555555,
+      "grad_norm": 5.228264126344584e-05,
+      "learning_rate": 1.244888888888889e-05,
+      "loss": 0.0,
+      "step": 8450
+    },
+    {
+      "epoch": 3.7777777777777777,
+      "grad_norm": 4.654955773730762e-05,
+      "learning_rate": 1.2226666666666668e-05,
+      "loss": 0.0,
+      "step": 8500
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 5.188977957004681e-05,
+      "learning_rate": 1.2004444444444444e-05,
+      "loss": 0.0,
+      "step": 8550
+    },
+    {
+      "epoch": 3.822222222222222,
+      "grad_norm": 0.002792242681607604,
+      "learning_rate": 1.1782222222222222e-05,
+      "loss": 0.0,
+      "step": 8600
+    },
+    {
+      "epoch": 3.8444444444444446,
+      "grad_norm": 5.102707291371189e-05,
+      "learning_rate": 1.156e-05,
+      "loss": 0.0,
+      "step": 8650
+    },
+    {
+      "epoch": 3.8666666666666667,
+      "grad_norm": 4.642659405362792e-05,
+      "learning_rate": 1.1337777777777777e-05,
+      "loss": 0.0,
+      "step": 8700
+    },
+    {
+      "epoch": 3.888888888888889,
+      "grad_norm": 4.101167724002153e-05,
+      "learning_rate": 1.1115555555555555e-05,
+      "loss": 0.0,
+      "step": 8750
+    },
+    {
+      "epoch": 3.911111111111111,
+      "grad_norm": 4.7815203288337216e-05,
+      "learning_rate": 1.0893333333333333e-05,
+      "loss": 0.0,
+      "step": 8800
+    },
+    {
+      "epoch": 3.9333333333333336,
+      "grad_norm": 4.7473920858465135e-05,
+      "learning_rate": 1.0671111111111112e-05,
+      "loss": 0.0,
+      "step": 8850
+    },
+    {
+      "epoch": 3.9555555555555557,
+      "grad_norm": 4.0860795706976205e-05,
+      "learning_rate": 1.044888888888889e-05,
+      "loss": 0.0,
+      "step": 8900
+    },
+    {
+      "epoch": 3.977777777777778,
+      "grad_norm": 3.960240792366676e-05,
+      "learning_rate": 1.0226666666666668e-05,
+      "loss": 0.0,
+      "step": 8950
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 4.005260416306555e-05,
+      "learning_rate": 1.0004444444444446e-05,
+      "loss": 0.0,
+      "step": 9000
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 3.7440361211338313e-07,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      8666\n    EDUCATION       1.00      1.00      1.00     10841\n     LANGUAGE       1.00      1.00      1.00      4050\n        SKILL       1.00      1.00      1.00     14410\n\n    micro avg       1.00      1.00      1.00     37967\n    macro avg       1.00      1.00      1.00     37967\n weighted avg       1.00      1.00      1.00     37967\n",
+      "eval_runtime": 39.2169,
+      "eval_samples_per_second": 50.998,
+      "eval_steps_per_second": 6.375,
+      "step": 9000
+    },
+    {
+      "epoch": 4.022222222222222,
+      "grad_norm": 4.486538819037378e-05,
+      "learning_rate": 9.782222222222224e-06,
+      "loss": 0.0,
+      "step": 9050
+    },
+    {
+      "epoch": 4.044444444444444,
+      "grad_norm": 4.269111741450615e-05,
+      "learning_rate": 9.560000000000002e-06,
+      "loss": 0.0,
+      "step": 9100
+    },
+    {
+      "epoch": 4.066666666666666,
+      "grad_norm": 3.833163282251917e-05,
+      "learning_rate": 9.337777777777778e-06,
+      "loss": 0.0,
+      "step": 9150
+    },
+    {
+      "epoch": 4.088888888888889,
+      "grad_norm": 5.278285243548453e-05,
+      "learning_rate": 9.115555555555556e-06,
+      "loss": 0.0,
+      "step": 9200
+    },
+    {
+      "epoch": 4.111111111111111,
+      "grad_norm": 3.422492227400653e-05,
+      "learning_rate": 8.893333333333333e-06,
+      "loss": 0.0,
+      "step": 9250
+    },
+    {
+      "epoch": 4.133333333333334,
+      "grad_norm": 3.486883360892534e-05,
+      "learning_rate": 8.671111111111111e-06,
+      "loss": 0.0,
+      "step": 9300
+    },
+    {
+      "epoch": 4.155555555555556,
+      "grad_norm": 3.659820868051611e-05,
+      "learning_rate": 8.448888888888889e-06,
+      "loss": 0.0,
+      "step": 9350
+    },
+    {
+      "epoch": 4.177777777777778,
+      "grad_norm": 3.242024467908777e-05,
+      "learning_rate": 8.226666666666667e-06,
+      "loss": 0.0,
+      "step": 9400
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.0001435010926797986,
+      "learning_rate": 8.004444444444445e-06,
+      "loss": 0.0,
+      "step": 9450
+    },
+    {
+      "epoch": 4.222222222222222,
+      "grad_norm": 3.879176438204013e-05,
+      "learning_rate": 7.782222222222223e-06,
+      "loss": 0.0,
+      "step": 9500
+    },
+    {
+      "epoch": 4.2444444444444445,
+      "grad_norm": 3.1664359994465485e-05,
+      "learning_rate": 7.5600000000000005e-06,
+      "loss": 0.0,
+      "step": 9550
+    },
+    {
+      "epoch": 4.266666666666667,
+      "grad_norm": 3.302018012618646e-05,
+      "learning_rate": 7.337777777777778e-06,
+      "loss": 0.0,
+      "step": 9600
+    },
+    {
+      "epoch": 4.288888888888889,
+      "grad_norm": 0.00034453265834599733,
+      "learning_rate": 7.115555555555556e-06,
+      "loss": 0.0,
+      "step": 9650
+    },
+    {
+      "epoch": 4.311111111111111,
+      "grad_norm": 3.3918331610038877e-05,
+      "learning_rate": 6.893333333333334e-06,
+      "loss": 0.0,
+      "step": 9700
+    },
+    {
+      "epoch": 4.333333333333333,
+      "grad_norm": 3.994768121629022e-05,
+      "learning_rate": 6.671111111111111e-06,
+      "loss": 0.0,
+      "step": 9750
+    },
+    {
+      "epoch": 4.355555555555555,
+      "grad_norm": 7.467544492101297e-05,
+      "learning_rate": 6.448888888888889e-06,
+      "loss": 0.0,
+      "step": 9800
+    },
+    {
+      "epoch": 4.377777777777778,
+      "grad_norm": 3.103712879237719e-05,
+      "learning_rate": 6.226666666666667e-06,
+      "loss": 0.0,
+      "step": 9850
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 4.751016604132019e-05,
+      "learning_rate": 6.0044444444444445e-06,
+      "loss": 0.0,
+      "step": 9900
+    },
+    {
+      "epoch": 4.4222222222222225,
+      "grad_norm": 9.309023153036833e-05,
+      "learning_rate": 5.782222222222222e-06,
+      "loss": 0.0,
+      "step": 9950
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 3.0390472602448426e-05,
+      "learning_rate": 5.56e-06,
+      "loss": 0.0,
+      "step": 10000
+    },
+    {
+      "epoch": 4.466666666666667,
+      "grad_norm": 3.085477146669291e-05,
+      "learning_rate": 5.337777777777778e-06,
+      "loss": 0.0,
+      "step": 10050
+    },
+    {
+      "epoch": 4.488888888888889,
+      "grad_norm": 3.4081851481460035e-05,
+      "learning_rate": 5.115555555555556e-06,
+      "loss": 0.0,
+      "step": 10100
+    },
+    {
+      "epoch": 4.511111111111111,
+      "grad_norm": 3.805098822340369e-05,
+      "learning_rate": 4.893333333333334e-06,
+      "loss": 0.0,
+      "step": 10150
+    },
+    {
+      "epoch": 4.533333333333333,
+      "grad_norm": 3.679243309306912e-05,
+      "learning_rate": 4.6711111111111115e-06,
+      "loss": 0.0,
+      "step": 10200
+    },
+    {
+      "epoch": 4.555555555555555,
+      "grad_norm": 3.425304385018535e-05,
+      "learning_rate": 4.448888888888889e-06,
+      "loss": 0.0,
+      "step": 10250
+    },
+    {
+      "epoch": 4.5777777777777775,
+      "grad_norm": 2.751518513832707e-05,
+      "learning_rate": 4.226666666666667e-06,
+      "loss": 0.0,
+      "step": 10300
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 2.8532382202683948e-05,
+      "learning_rate": 4.004444444444445e-06,
+      "loss": 0.0,
+      "step": 10350
+    },
+    {
+      "epoch": 4.622222222222222,
+      "grad_norm": 2.9013817766099237e-05,
+      "learning_rate": 3.7822222222222224e-06,
+      "loss": 0.0,
+      "step": 10400
+    },
+    {
+      "epoch": 4.644444444444445,
+      "grad_norm": 3.07061527564656e-05,
+      "learning_rate": 3.5600000000000002e-06,
+      "loss": 0.0,
+      "step": 10450
+    },
+    {
+      "epoch": 4.666666666666667,
+      "grad_norm": 2.944734660559334e-05,
+      "learning_rate": 3.337777777777778e-06,
+      "loss": 0.0,
+      "step": 10500
+    },
+    {
+      "epoch": 4.688888888888889,
+      "grad_norm": 2.6814725060830824e-05,
+      "learning_rate": 3.1155555555555555e-06,
+      "loss": 0.0,
+      "step": 10550
+    },
+    {
+      "epoch": 4.711111111111111,
+      "grad_norm": 3.8631915231235325e-05,
+      "learning_rate": 2.8933333333333333e-06,
+      "loss": 0.0,
+      "step": 10600
+    },
+    {
+      "epoch": 4.733333333333333,
+      "grad_norm": 4.212988278595731e-05,
+      "learning_rate": 2.6711111111111116e-06,
+      "loss": 0.0,
+      "step": 10650
+    },
+    {
+      "epoch": 4.7555555555555555,
+      "grad_norm": 2.7735224648495205e-05,
+      "learning_rate": 2.448888888888889e-06,
+      "loss": 0.0,
+      "step": 10700
+    },
+    {
+      "epoch": 4.777777777777778,
+      "grad_norm": 3.2194642699323595e-05,
+      "learning_rate": 2.226666666666667e-06,
+      "loss": 0.0,
+      "step": 10750
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 2.7552601750358008e-05,
+      "learning_rate": 2.0044444444444446e-06,
+      "loss": 0.0,
+      "step": 10800
+    },
+    {
+      "epoch": 4.822222222222222,
+      "grad_norm": 3.464681503828615e-05,
+      "learning_rate": 1.7822222222222223e-06,
+      "loss": 0.0,
+      "step": 10850
+    },
+    {
+      "epoch": 4.844444444444444,
+      "grad_norm": 2.8678237868007272e-05,
+      "learning_rate": 1.56e-06,
+      "loss": 0.0,
+      "step": 10900
+    },
+    {
+      "epoch": 4.866666666666667,
+      "grad_norm": 2.6989377147401683e-05,
+      "learning_rate": 1.337777777777778e-06,
+      "loss": 0.0,
+      "step": 10950
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 2.9060267479508184e-05,
+      "learning_rate": 1.1155555555555556e-06,
+      "loss": 0.0,
+      "step": 11000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 11250,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0134876111806016e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-11000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2437d3569274a4022188190da6f34872c3e1e86886e244e0bd97efb2e6b5384c
+size 5713

checkpoint-11250/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

checkpoint-11250/config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "architectures": [
+    "DebertaV2ForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 1,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8
+  },
+  "layer_norm_eps": 1e-07,
+  "legacy": true,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "transformers_version": "4.56.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

checkpoint-11250/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10016fab24ea554c4127ed2f5363f71363d520366f4fd4acf3d6516d522b1f92
+size 735378268

checkpoint-11250/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea08f3db7e2403bbb8948d7a24efdb4847c0f8034ad875aefcaf9cb0ed459882
+size 1470878283

checkpoint-11250/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8c80d3c0c1f1e6f97164c4dd8e1ddd46ae7e23eed7be8b3cf781bafd6fbc2e4
+size 14645

checkpoint-11250/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ba182a6b24992ed415540c66b4789d972bacea714ff5a20fad3bc4d2f705ba8
+size 1465

checkpoint-11250/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-11250/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

checkpoint-11250/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-11250/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

checkpoint-11250/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1645 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 11250,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.022222222222222223,
+      "grad_norm": 0.4169579744338989,
+      "learning_rate": 4.9782222222222224e-05,
+      "loss": 0.2263,
+      "step": 50
+    },
+    {
+      "epoch": 0.044444444444444446,
+      "grad_norm": 0.33507040143013,
+      "learning_rate": 4.956e-05,
+      "loss": 0.0174,
+      "step": 100
+    },
+    {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 0.11501526087522507,
+      "learning_rate": 4.933777777777778e-05,
+      "loss": 0.0101,
+      "step": 150
+    },
+    {
+      "epoch": 0.08888888888888889,
+      "grad_norm": 0.09099342674016953,
+      "learning_rate": 4.911555555555556e-05,
+      "loss": 0.0061,
+      "step": 200
+    },
+    {
+      "epoch": 0.1111111111111111,
+      "grad_norm": 0.08669792860746384,
+      "learning_rate": 4.8893333333333335e-05,
+      "loss": 0.0036,
+      "step": 250
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.07038326561450958,
+      "learning_rate": 4.867111111111111e-05,
+      "loss": 0.0037,
+      "step": 300
+    },
+    {
+      "epoch": 0.15555555555555556,
+      "grad_norm": 0.08929373323917389,
+      "learning_rate": 4.8448888888888894e-05,
+      "loss": 0.0025,
+      "step": 350
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 0.010555864311754704,
+      "learning_rate": 4.822666666666667e-05,
+      "loss": 0.0011,
+      "step": 400
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.12513870000839233,
+      "learning_rate": 4.8004444444444446e-05,
+      "loss": 0.0014,
+      "step": 450
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 0.008826244622468948,
+      "learning_rate": 4.778222222222222e-05,
+      "loss": 0.0009,
+      "step": 500
+    },
+    {
+      "epoch": 0.24444444444444444,
+      "grad_norm": 0.3243519067764282,
+      "learning_rate": 4.7560000000000005e-05,
+      "loss": 0.0003,
+      "step": 550
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.013048608787357807,
+      "learning_rate": 4.733777777777778e-05,
+      "loss": 0.003,
+      "step": 600
+    },
+    {
+      "epoch": 0.28888888888888886,
+      "grad_norm": 0.004392141476273537,
+      "learning_rate": 4.711555555555556e-05,
+      "loss": 0.0017,
+      "step": 650
+    },
+    {
+      "epoch": 0.3111111111111111,
+      "grad_norm": 0.009453477337956429,
+      "learning_rate": 4.6893333333333334e-05,
+      "loss": 0.0007,
+      "step": 700
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.29423239827156067,
+      "learning_rate": 4.667111111111112e-05,
+      "loss": 0.0004,
+      "step": 750
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 0.007287424057722092,
+      "learning_rate": 4.644888888888889e-05,
+      "loss": 0.0004,
+      "step": 800
+    },
+    {
+      "epoch": 0.37777777777777777,
+      "grad_norm": 0.006468615494668484,
+      "learning_rate": 4.622666666666667e-05,
+      "loss": 0.001,
+      "step": 850
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.0032176165841519833,
+      "learning_rate": 4.6004444444444445e-05,
+      "loss": 0.0013,
+      "step": 900
+    },
+    {
+      "epoch": 0.4222222222222222,
+      "grad_norm": 0.0045009879395365715,
+      "learning_rate": 4.578222222222223e-05,
+      "loss": 0.0009,
+      "step": 950
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 0.04195983707904816,
+      "learning_rate": 4.5560000000000004e-05,
+      "loss": 0.0002,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 0.0009760915418155491,
+      "learning_rate": 4.533777777777778e-05,
+      "loss": 0.0001,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4888888888888889,
+      "grad_norm": 0.0008424059487879276,
+      "learning_rate": 4.5115555555555557e-05,
+      "loss": 0.0001,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5111111111111111,
+      "grad_norm": 0.001366421696729958,
+      "learning_rate": 4.489333333333334e-05,
+      "loss": 0.0001,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.0733928307890892,
+      "learning_rate": 4.4671111111111116e-05,
+      "loss": 0.0001,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 0.005551271606236696,
+      "learning_rate": 4.444888888888889e-05,
+      "loss": 0.0008,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5777777777777777,
+      "grad_norm": 0.0026340284384787083,
+      "learning_rate": 4.422666666666667e-05,
+      "loss": 0.0002,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.002755340188741684,
+      "learning_rate": 4.400444444444445e-05,
+      "loss": 0.0001,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6222222222222222,
+      "grad_norm": 0.0008212323882617056,
+      "learning_rate": 4.378222222222223e-05,
+      "loss": 0.0,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6444444444444445,
+      "grad_norm": 0.0005921730189584196,
+      "learning_rate": 4.356e-05,
+      "loss": 0.0,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.0008460666285827756,
+      "learning_rate": 4.333777777777778e-05,
+      "loss": 0.0001,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6888888888888889,
+      "grad_norm": 0.0006111777038313448,
+      "learning_rate": 4.311555555555556e-05,
+      "loss": 0.0,
+      "step": 1550
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 0.00046472539543174207,
+      "learning_rate": 4.289333333333334e-05,
+      "loss": 0.0,
+      "step": 1600
+    },
+    {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 0.0007363149197772145,
+      "learning_rate": 4.2671111111111114e-05,
+      "loss": 0.0,
+      "step": 1650
+    },
+    {
+      "epoch": 0.7555555555555555,
+      "grad_norm": 0.0006023353198543191,
+      "learning_rate": 4.244888888888889e-05,
+      "loss": 0.0,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 0.0004273591039236635,
+      "learning_rate": 4.222666666666667e-05,
+      "loss": 0.0,
+      "step": 1750
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.0006913533434271812,
+      "learning_rate": 4.200444444444445e-05,
+      "loss": 0.0,
+      "step": 1800
+    },
+    {
+      "epoch": 0.8222222222222222,
+      "grad_norm": 0.0014112872304394841,
+      "learning_rate": 4.1782222222222226e-05,
+      "loss": 0.0002,
+      "step": 1850
+    },
+    {
+      "epoch": 0.8444444444444444,
+      "grad_norm": 0.0024269793648272753,
+      "learning_rate": 4.156e-05,
+      "loss": 0.0005,
+      "step": 1900
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 0.46272382140159607,
+      "learning_rate": 4.133777777777778e-05,
+      "loss": 0.0013,
+      "step": 1950
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.006871496792882681,
+      "learning_rate": 4.1115555555555554e-05,
+      "loss": 0.001,
+      "step": 2000
+    },
+    {
+      "epoch": 0.9111111111111111,
+      "grad_norm": 0.0022507584653794765,
+      "learning_rate": 4.089333333333333e-05,
+      "loss": 0.001,
+      "step": 2050
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.0077248550951480865,
+      "learning_rate": 4.067111111111111e-05,
+      "loss": 0.0005,
+      "step": 2100
+    },
+    {
+      "epoch": 0.9555555555555556,
+      "grad_norm": 0.0006559474277310073,
+      "learning_rate": 4.044888888888889e-05,
+      "loss": 0.0003,
+      "step": 2150
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 0.000827161071356386,
+      "learning_rate": 4.0226666666666666e-05,
+      "loss": 0.0012,
+      "step": 2200
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.22399061918258667,
+      "learning_rate": 4.000444444444444e-05,
+      "loss": 0.0005,
+      "step": 2250
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.0001230358611792326,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      8666\n    EDUCATION       1.00      1.00      1.00     10841\n     LANGUAGE       1.00      1.00      1.00      4050\n        SKILL       1.00      1.00      1.00     14410\n\n    micro avg       1.00      1.00      1.00     37967\n    macro avg       1.00      1.00      1.00     37967\n weighted avg       1.00      1.00      1.00     37967\n",
+      "eval_runtime": 39.9747,
+      "eval_samples_per_second": 50.032,
+      "eval_steps_per_second": 6.254,
+      "step": 2250
+    },
+    {
+      "epoch": 1.0222222222222221,
+      "grad_norm": 0.0034671323373913765,
+      "learning_rate": 3.9782222222222225e-05,
+      "loss": 0.0005,
+      "step": 2300
+    },
+    {
+      "epoch": 1.0444444444444445,
+      "grad_norm": 0.005704512819647789,
+      "learning_rate": 3.956e-05,
+      "loss": 0.0024,
+      "step": 2350
+    },
+    {
+      "epoch": 1.0666666666666667,
+      "grad_norm": 0.0016336466651409864,
+      "learning_rate": 3.933777777777778e-05,
+      "loss": 0.0004,
+      "step": 2400
+    },
+    {
+      "epoch": 1.0888888888888888,
+      "grad_norm": 0.0009551944676786661,
+      "learning_rate": 3.911555555555555e-05,
+      "loss": 0.0002,
+      "step": 2450
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.0004279686836525798,
+      "learning_rate": 3.8893333333333336e-05,
+      "loss": 0.0001,
+      "step": 2500
+    },
+    {
+      "epoch": 1.1333333333333333,
+      "grad_norm": 0.00043013374670408666,
+      "learning_rate": 3.867111111111111e-05,
+      "loss": 0.0001,
+      "step": 2550
+    },
+    {
+      "epoch": 1.1555555555555554,
+      "grad_norm": 0.00034906569635495543,
+      "learning_rate": 3.844888888888889e-05,
+      "loss": 0.0,
+      "step": 2600
+    },
+    {
+      "epoch": 1.1777777777777778,
+      "grad_norm": 0.0014709575334563851,
+      "learning_rate": 3.8226666666666664e-05,
+      "loss": 0.0002,
+      "step": 2650
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.0006022250163368881,
+      "learning_rate": 3.800444444444445e-05,
+      "loss": 0.0,
+      "step": 2700
+    },
+    {
+      "epoch": 1.2222222222222223,
+      "grad_norm": 0.000542789523024112,
+      "learning_rate": 3.778222222222222e-05,
+      "loss": 0.0001,
+      "step": 2750
+    },
+    {
+      "epoch": 1.2444444444444445,
+      "grad_norm": 0.00048277038149535656,
+      "learning_rate": 3.756e-05,
+      "loss": 0.0002,
+      "step": 2800
+    },
+    {
+      "epoch": 1.2666666666666666,
+      "grad_norm": 0.0003071363898925483,
+      "learning_rate": 3.7337777777777776e-05,
+      "loss": 0.0,
+      "step": 2850
+    },
+    {
+      "epoch": 1.2888888888888888,
+      "grad_norm": 0.0003033171233255416,
+      "learning_rate": 3.711555555555556e-05,
+      "loss": 0.0,
+      "step": 2900
+    },
+    {
+      "epoch": 1.3111111111111111,
+      "grad_norm": 0.001257324474863708,
+      "learning_rate": 3.6893333333333335e-05,
+      "loss": 0.0011,
+      "step": 2950
+    },
+    {
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.0003226712578907609,
+      "learning_rate": 3.667111111111111e-05,
+      "loss": 0.0002,
+      "step": 3000
+    },
+    {
+      "epoch": 1.3555555555555556,
+      "grad_norm": 0.00033422038541175425,
+      "learning_rate": 3.644888888888889e-05,
+      "loss": 0.0001,
+      "step": 3050
+    },
+    {
+      "epoch": 1.3777777777777778,
+      "grad_norm": 0.0004989306908100843,
+      "learning_rate": 3.622666666666667e-05,
+      "loss": 0.0,
+      "step": 3100
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 0.0002919055405072868,
+      "learning_rate": 3.6004444444444446e-05,
+      "loss": 0.0,
+      "step": 3150
+    },
+    {
+      "epoch": 1.4222222222222223,
+      "grad_norm": 0.00027494438108988106,
+      "learning_rate": 3.578222222222222e-05,
+      "loss": 0.0,
+      "step": 3200
+    },
+    {
+      "epoch": 1.4444444444444444,
+      "grad_norm": 0.000610452436376363,
+      "learning_rate": 3.5560000000000005e-05,
+      "loss": 0.0,
+      "step": 3250
+    },
+    {
+      "epoch": 1.4666666666666668,
+      "grad_norm": 0.000538027728907764,
+      "learning_rate": 3.533777777777778e-05,
+      "loss": 0.0,
+      "step": 3300
+    },
+    {
+      "epoch": 1.488888888888889,
+      "grad_norm": 0.00020039589435327798,
+      "learning_rate": 3.511555555555556e-05,
+      "loss": 0.0,
+      "step": 3350
+    },
+    {
+      "epoch": 1.511111111111111,
+      "grad_norm": 0.00018263465608470142,
+      "learning_rate": 3.4893333333333334e-05,
+      "loss": 0.0,
+      "step": 3400
+    },
+    {
+      "epoch": 1.5333333333333332,
+      "grad_norm": 0.00019636489741969854,
+      "learning_rate": 3.4671111111111116e-05,
+      "loss": 0.0,
+      "step": 3450
+    },
+    {
+      "epoch": 1.5555555555555556,
+      "grad_norm": 0.00028676423244178295,
+      "learning_rate": 3.444888888888889e-05,
+      "loss": 0.0,
+      "step": 3500
+    },
+    {
+      "epoch": 1.5777777777777777,
+      "grad_norm": 0.00019564179820008576,
+      "learning_rate": 3.422666666666667e-05,
+      "loss": 0.0,
+      "step": 3550
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.00020010425942018628,
+      "learning_rate": 3.4004444444444445e-05,
+      "loss": 0.0,
+      "step": 3600
+    },
+    {
+      "epoch": 1.6222222222222222,
+      "grad_norm": 0.00015156572044361383,
+      "learning_rate": 3.378222222222223e-05,
+      "loss": 0.0,
+      "step": 3650
+    },
+    {
+      "epoch": 1.6444444444444444,
+      "grad_norm": 0.0001559116062708199,
+      "learning_rate": 3.3560000000000004e-05,
+      "loss": 0.0,
+      "step": 3700
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.00017668222426436841,
+      "learning_rate": 3.333777777777778e-05,
+      "loss": 0.0,
+      "step": 3750
+    },
+    {
+      "epoch": 1.6888888888888889,
+      "grad_norm": 0.00015294237527996302,
+      "learning_rate": 3.3115555555555556e-05,
+      "loss": 0.0,
+      "step": 3800
+    },
+    {
+      "epoch": 1.7111111111111112,
+      "grad_norm": 0.00020564977603498846,
+      "learning_rate": 3.289333333333334e-05,
+      "loss": 0.0,
+      "step": 3850
+    },
+    {
+      "epoch": 1.7333333333333334,
+      "grad_norm": 0.00015193119179457426,
+      "learning_rate": 3.2671111111111115e-05,
+      "loss": 0.0,
+      "step": 3900
+    },
+    {
+      "epoch": 1.7555555555555555,
+      "grad_norm": 0.00013517968181986362,
+      "learning_rate": 3.244888888888889e-05,
+      "loss": 0.0,
+      "step": 3950
+    },
+    {
+      "epoch": 1.7777777777777777,
+      "grad_norm": 0.00027755036717280746,
+      "learning_rate": 3.222666666666667e-05,
+      "loss": 0.0,
+      "step": 4000
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 0.0001287544146180153,
+      "learning_rate": 3.200444444444445e-05,
+      "loss": 0.0,
+      "step": 4050
+    },
+    {
+      "epoch": 1.8222222222222222,
+      "grad_norm": 0.0001332290848949924,
+      "learning_rate": 3.178222222222223e-05,
+      "loss": 0.0,
+      "step": 4100
+    },
+    {
+      "epoch": 1.8444444444444446,
+      "grad_norm": 0.00012569426326081157,
+      "learning_rate": 3.156e-05,
+      "loss": 0.0,
+      "step": 4150
+    },
+    {
+      "epoch": 1.8666666666666667,
+      "grad_norm": 0.00011655821435851976,
+      "learning_rate": 3.133777777777778e-05,
+      "loss": 0.0,
+      "step": 4200
+    },
+    {
+      "epoch": 1.8888888888888888,
+      "grad_norm": 0.0001188791575259529,
+      "learning_rate": 3.111555555555556e-05,
+      "loss": 0.0,
+      "step": 4250
+    },
+    {
+      "epoch": 1.911111111111111,
+      "grad_norm": 0.00010238583490718156,
+      "learning_rate": 3.089333333333334e-05,
+      "loss": 0.0,
+      "step": 4300
+    },
+    {
+      "epoch": 1.9333333333333333,
+      "grad_norm": 0.0001092032398446463,
+      "learning_rate": 3.0671111111111114e-05,
+      "loss": 0.0,
+      "step": 4350
+    },
+    {
+      "epoch": 1.9555555555555557,
+      "grad_norm": 0.00014390636351890862,
+      "learning_rate": 3.0448888888888887e-05,
+      "loss": 0.0,
+      "step": 4400
+    },
+    {
+      "epoch": 1.9777777777777779,
+      "grad_norm": 0.00010023260256275535,
+      "learning_rate": 3.022666666666667e-05,
+      "loss": 0.0,
+      "step": 4450
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.00010638952517183498,
+      "learning_rate": 3.0004444444444446e-05,
+      "loss": 0.0,
+      "step": 4500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 1.410930394740717e-06,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      8666\n    EDUCATION       1.00      1.00      1.00     10841\n     LANGUAGE       1.00      1.00      1.00      4050\n        SKILL       1.00      1.00      1.00     14410\n\n    micro avg       1.00      1.00      1.00     37967\n    macro avg       1.00      1.00      1.00     37967\n weighted avg       1.00      1.00      1.00     37967\n",
+      "eval_runtime": 39.342,
+      "eval_samples_per_second": 50.836,
+      "eval_steps_per_second": 6.355,
+      "step": 4500
+    },
+    {
+      "epoch": 2.022222222222222,
+      "grad_norm": 0.00010032003774540499,
+      "learning_rate": 2.9782222222222222e-05,
+      "loss": 0.0,
+      "step": 4550
+    },
+    {
+      "epoch": 2.0444444444444443,
+      "grad_norm": 0.00048742775106802583,
+      "learning_rate": 2.9559999999999998e-05,
+      "loss": 0.0,
+      "step": 4600
+    },
+    {
+      "epoch": 2.066666666666667,
+      "grad_norm": 9.531196701573208e-05,
+      "learning_rate": 2.933777777777778e-05,
+      "loss": 0.0,
+      "step": 4650
+    },
+    {
+      "epoch": 2.088888888888889,
+      "grad_norm": 9.425494499737397e-05,
+      "learning_rate": 2.9115555555555557e-05,
+      "loss": 0.0,
+      "step": 4700
+    },
+    {
+      "epoch": 2.111111111111111,
+      "grad_norm": 0.00011574638483580202,
+      "learning_rate": 2.8893333333333333e-05,
+      "loss": 0.0,
+      "step": 4750
+    },
+    {
+      "epoch": 2.1333333333333333,
+      "grad_norm": 9.062536264536902e-05,
+      "learning_rate": 2.8671111111111116e-05,
+      "loss": 0.0,
+      "step": 4800
+    },
+    {
+      "epoch": 2.1555555555555554,
+      "grad_norm": 9.913908434100449e-05,
+      "learning_rate": 2.8448888888888892e-05,
+      "loss": 0.0,
+      "step": 4850
+    },
+    {
+      "epoch": 2.1777777777777776,
+      "grad_norm": 0.00010020509216701612,
+      "learning_rate": 2.822666666666667e-05,
+      "loss": 0.0,
+      "step": 4900
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 0.00012128752132412046,
+      "learning_rate": 2.8004444444444445e-05,
+      "loss": 0.0,
+      "step": 4950
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.02587141841650009,
+      "learning_rate": 2.7782222222222228e-05,
+      "loss": 0.0,
+      "step": 5000
+    },
+    {
+      "epoch": 2.2444444444444445,
+      "grad_norm": 8.068871829891577e-05,
+      "learning_rate": 2.7560000000000004e-05,
+      "loss": 0.0,
+      "step": 5050
+    },
+    {
+      "epoch": 2.2666666666666666,
+      "grad_norm": 8.70975709403865e-05,
+      "learning_rate": 2.733777777777778e-05,
+      "loss": 0.0,
+      "step": 5100
+    },
+    {
+      "epoch": 2.2888888888888888,
+      "grad_norm": 8.906049333745614e-05,
+      "learning_rate": 2.7115555555555556e-05,
+      "loss": 0.0,
+      "step": 5150
+    },
+    {
+      "epoch": 2.311111111111111,
+      "grad_norm": 7.555448246421292e-05,
+      "learning_rate": 2.6893333333333336e-05,
+      "loss": 0.0,
+      "step": 5200
+    },
+    {
+      "epoch": 2.3333333333333335,
+      "grad_norm": 0.00033314700704067945,
+      "learning_rate": 2.6671111111111112e-05,
+      "loss": 0.0,
+      "step": 5250
+    },
+    {
+      "epoch": 2.3555555555555556,
+      "grad_norm": 8.106895256787539e-05,
+      "learning_rate": 2.644888888888889e-05,
+      "loss": 0.0,
+      "step": 5300
+    },
+    {
+      "epoch": 2.3777777777777778,
+      "grad_norm": 0.0002253134734928608,
+      "learning_rate": 2.6226666666666667e-05,
+      "loss": 0.0002,
+      "step": 5350
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.04773925617337227,
+      "learning_rate": 2.6004444444444447e-05,
+      "loss": 0.0016,
+      "step": 5400
+    },
+    {
+      "epoch": 2.422222222222222,
+      "grad_norm": 0.013284893706440926,
+      "learning_rate": 2.5782222222222223e-05,
+      "loss": 0.0013,
+      "step": 5450
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 0.0019914733711630106,
+      "learning_rate": 2.556e-05,
+      "loss": 0.001,
+      "step": 5500
+    },
+    {
+      "epoch": 2.466666666666667,
+      "grad_norm": 0.00029465914121828973,
+      "learning_rate": 2.5337777777777775e-05,
+      "loss": 0.0001,
+      "step": 5550
+    },
+    {
+      "epoch": 2.488888888888889,
+      "grad_norm": 0.0010584425181150436,
+      "learning_rate": 2.5115555555555558e-05,
+      "loss": 0.0,
+      "step": 5600
+    },
+    {
+      "epoch": 2.511111111111111,
+      "grad_norm": 0.00024866798776201904,
+      "learning_rate": 2.4893333333333334e-05,
+      "loss": 0.0,
+      "step": 5650
+    },
+    {
+      "epoch": 2.533333333333333,
+      "grad_norm": 0.00015023746527731419,
+      "learning_rate": 2.467111111111111e-05,
+      "loss": 0.0,
+      "step": 5700
+    },
+    {
+      "epoch": 2.5555555555555554,
+      "grad_norm": 0.00020707614021375775,
+      "learning_rate": 2.444888888888889e-05,
+      "loss": 0.0,
+      "step": 5750
+    },
+    {
+      "epoch": 2.5777777777777775,
+      "grad_norm": 0.00013307879271451384,
+      "learning_rate": 2.4226666666666666e-05,
+      "loss": 0.0,
+      "step": 5800
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 0.00012964503548573703,
+      "learning_rate": 2.4004444444444446e-05,
+      "loss": 0.0,
+      "step": 5850
+    },
+    {
+      "epoch": 2.6222222222222222,
+      "grad_norm": 0.00012642733054235578,
+      "learning_rate": 2.3782222222222222e-05,
+      "loss": 0.0,
+      "step": 5900
+    },
+    {
+      "epoch": 2.6444444444444444,
+      "grad_norm": 0.00172146107070148,
+      "learning_rate": 2.356e-05,
+      "loss": 0.0005,
+      "step": 5950
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.00067210040288046,
+      "learning_rate": 2.3337777777777778e-05,
+      "loss": 0.0003,
+      "step": 6000
+    },
+    {
+      "epoch": 2.688888888888889,
+      "grad_norm": 0.0019879883620887995,
+      "learning_rate": 2.3115555555555557e-05,
+      "loss": 0.0001,
+      "step": 6050
+    },
+    {
+      "epoch": 2.7111111111111112,
+      "grad_norm": 0.00017190357903018594,
+      "learning_rate": 2.2893333333333333e-05,
+      "loss": 0.0,
+      "step": 6100
+    },
+    {
+      "epoch": 2.7333333333333334,
+      "grad_norm": 0.00015691675071138889,
+      "learning_rate": 2.2671111111111113e-05,
+      "loss": 0.0,
+      "step": 6150
+    },
+    {
+      "epoch": 2.7555555555555555,
+      "grad_norm": 0.0001467355468776077,
+      "learning_rate": 2.244888888888889e-05,
+      "loss": 0.0,
+      "step": 6200
+    },
+    {
+      "epoch": 2.7777777777777777,
+      "grad_norm": 0.00015619705663993955,
+      "learning_rate": 2.222666666666667e-05,
+      "loss": 0.0,
+      "step": 6250
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 0.0002816039486788213,
+      "learning_rate": 2.2004444444444445e-05,
+      "loss": 0.0,
+      "step": 6300
+    },
+    {
+      "epoch": 2.822222222222222,
+      "grad_norm": 0.0002559045678935945,
+      "learning_rate": 2.1782222222222224e-05,
+      "loss": 0.0,
+      "step": 6350
+    },
+    {
+      "epoch": 2.8444444444444446,
+      "grad_norm": 0.00017076350923161954,
+      "learning_rate": 2.1560000000000004e-05,
+      "loss": 0.0,
+      "step": 6400
+    },
+    {
+      "epoch": 2.8666666666666667,
+      "grad_norm": 9.32483744691126e-05,
+      "learning_rate": 2.133777777777778e-05,
+      "loss": 0.0,
+      "step": 6450
+    },
+    {
+      "epoch": 2.888888888888889,
+      "grad_norm": 9.684438555268571e-05,
+      "learning_rate": 2.111555555555556e-05,
+      "loss": 0.0,
+      "step": 6500
+    },
+    {
+      "epoch": 2.911111111111111,
+      "grad_norm": 0.00010544607357587665,
+      "learning_rate": 2.0893333333333335e-05,
+      "loss": 0.0,
+      "step": 6550
+    },
+    {
+      "epoch": 2.9333333333333336,
+      "grad_norm": 9.587592649040744e-05,
+      "learning_rate": 2.0671111111111115e-05,
+      "loss": 0.0,
+      "step": 6600
+    },
+    {
+      "epoch": 2.9555555555555557,
+      "grad_norm": 0.00025339287822134793,
+      "learning_rate": 2.044888888888889e-05,
+      "loss": 0.0,
+      "step": 6650
+    },
+    {
+      "epoch": 2.977777777777778,
+      "grad_norm": 8.56281112646684e-05,
+      "learning_rate": 2.0226666666666667e-05,
+      "loss": 0.0,
+      "step": 6700
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 8.848596917232499e-05,
+      "learning_rate": 2.0004444444444447e-05,
+      "loss": 0.0,
+      "step": 6750
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 9.946712680175551e-07,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      8666\n    EDUCATION       1.00      1.00      1.00     10841\n     LANGUAGE       1.00      1.00      1.00      4050\n        SKILL       1.00      1.00      1.00     14410\n\n    micro avg       1.00      1.00      1.00     37967\n    macro avg       1.00      1.00      1.00     37967\n weighted avg       1.00      1.00      1.00     37967\n",
+      "eval_runtime": 39.3914,
+      "eval_samples_per_second": 50.773,
+      "eval_steps_per_second": 6.347,
+      "step": 6750
+    },
+    {
+      "epoch": 3.022222222222222,
+      "grad_norm": 7.696033571846783e-05,
+      "learning_rate": 1.9782222222222223e-05,
+      "loss": 0.0,
+      "step": 6800
+    },
+    {
+      "epoch": 3.0444444444444443,
+      "grad_norm": 6.949145608814433e-05,
+      "learning_rate": 1.956e-05,
+      "loss": 0.0,
+      "step": 6850
+    },
+    {
+      "epoch": 3.066666666666667,
+      "grad_norm": 8.268443343695253e-05,
+      "learning_rate": 1.933777777777778e-05,
+      "loss": 0.0,
+      "step": 6900
+    },
+    {
+      "epoch": 3.088888888888889,
+      "grad_norm": 7.342168828472495e-05,
+      "learning_rate": 1.9115555555555555e-05,
+      "loss": 0.0,
+      "step": 6950
+    },
+    {
+      "epoch": 3.111111111111111,
+      "grad_norm": 6.869197386549786e-05,
+      "learning_rate": 1.8893333333333334e-05,
+      "loss": 0.0,
+      "step": 7000
+    },
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 6.17999758105725e-05,
+      "learning_rate": 1.867111111111111e-05,
+      "loss": 0.0,
+      "step": 7050
+    },
+    {
+      "epoch": 3.1555555555555554,
+      "grad_norm": 9.41220423555933e-05,
+      "learning_rate": 1.844888888888889e-05,
+      "loss": 0.0014,
+      "step": 7100
+    },
+    {
+      "epoch": 3.1777777777777776,
+      "grad_norm": 0.00021578388987109065,
+      "learning_rate": 1.8226666666666666e-05,
+      "loss": 0.0001,
+      "step": 7150
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.0057373447343707085,
+      "learning_rate": 1.8004444444444446e-05,
+      "loss": 0.0002,
+      "step": 7200
+    },
+    {
+      "epoch": 3.2222222222222223,
+      "grad_norm": 0.00015577590966131538,
+      "learning_rate": 1.7782222222222222e-05,
+      "loss": 0.0003,
+      "step": 7250
+    },
+    {
+      "epoch": 3.2444444444444445,
+      "grad_norm": 0.0001663435687078163,
+      "learning_rate": 1.756e-05,
+      "loss": 0.0,
+      "step": 7300
+    },
+    {
+      "epoch": 3.2666666666666666,
+      "grad_norm": 0.00011119741975562647,
+      "learning_rate": 1.7337777777777777e-05,
+      "loss": 0.0,
+      "step": 7350
+    },
+    {
+      "epoch": 3.2888888888888888,
+      "grad_norm": 0.0001374996645608917,
+      "learning_rate": 1.7115555555555557e-05,
+      "loss": 0.0,
+      "step": 7400
+    },
+    {
+      "epoch": 3.311111111111111,
+      "grad_norm": 0.00021004487643949687,
+      "learning_rate": 1.6893333333333333e-05,
+      "loss": 0.0,
+      "step": 7450
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.0001712692028377205,
+      "learning_rate": 1.6671111111111113e-05,
+      "loss": 0.0,
+      "step": 7500
+    },
+    {
+      "epoch": 3.3555555555555556,
+      "grad_norm": 9.832726209424436e-05,
+      "learning_rate": 1.644888888888889e-05,
+      "loss": 0.0,
+      "step": 7550
+    },
+    {
+      "epoch": 3.3777777777777778,
+      "grad_norm": 9.489347576163709e-05,
+      "learning_rate": 1.6226666666666668e-05,
+      "loss": 0.0,
+      "step": 7600
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 9.42831247812137e-05,
+      "learning_rate": 1.6004444444444444e-05,
+      "loss": 0.0,
+      "step": 7650
+    },
+    {
+      "epoch": 3.422222222222222,
+      "grad_norm": 0.0001237511314684525,
+      "learning_rate": 1.5782222222222224e-05,
+      "loss": 0.0,
+      "step": 7700
+    },
+    {
+      "epoch": 3.4444444444444446,
+      "grad_norm": 6.492211105069146e-05,
+      "learning_rate": 1.556e-05,
+      "loss": 0.0,
+      "step": 7750
+    },
+    {
+      "epoch": 3.466666666666667,
+      "grad_norm": 6.56497068121098e-05,
+      "learning_rate": 1.533777777777778e-05,
+      "loss": 0.0,
+      "step": 7800
+    },
+    {
+      "epoch": 3.488888888888889,
+      "grad_norm": 6.696177297271788e-05,
+      "learning_rate": 1.5115555555555556e-05,
+      "loss": 0.0,
+      "step": 7850
+    },
+    {
+      "epoch": 3.511111111111111,
+      "grad_norm": 5.779510684078559e-05,
+      "learning_rate": 1.4893333333333334e-05,
+      "loss": 0.0,
+      "step": 7900
+    },
+    {
+      "epoch": 3.533333333333333,
+      "grad_norm": 6.52693779557012e-05,
+      "learning_rate": 1.467111111111111e-05,
+      "loss": 0.0,
+      "step": 7950
+    },
+    {
+      "epoch": 3.5555555555555554,
+      "grad_norm": 9.678869537310675e-05,
+      "learning_rate": 1.444888888888889e-05,
+      "loss": 0.0,
+      "step": 8000
+    },
+    {
+      "epoch": 3.5777777777777775,
+      "grad_norm": 5.08545599586796e-05,
+      "learning_rate": 1.4226666666666669e-05,
+      "loss": 0.0,
+      "step": 8050
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 0.0001278165145777166,
+      "learning_rate": 1.4004444444444445e-05,
+      "loss": 0.0,
+      "step": 8100
+    },
+    {
+      "epoch": 3.6222222222222222,
+      "grad_norm": 5.6915894674602896e-05,
+      "learning_rate": 1.3782222222222224e-05,
+      "loss": 0.0,
+      "step": 8150
+    },
+    {
+      "epoch": 3.6444444444444444,
+      "grad_norm": 5.536643220693804e-05,
+      "learning_rate": 1.356e-05,
+      "loss": 0.0,
+      "step": 8200
+    },
+    {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 6.497505819424987e-05,
+      "learning_rate": 1.333777777777778e-05,
+      "loss": 0.0,
+      "step": 8250
+    },
+    {
+      "epoch": 3.688888888888889,
+      "grad_norm": 4.67045720142778e-05,
+      "learning_rate": 1.3115555555555556e-05,
+      "loss": 0.0,
+      "step": 8300
+    },
+    {
+      "epoch": 3.7111111111111112,
+      "grad_norm": 5.038898962084204e-05,
+      "learning_rate": 1.2893333333333336e-05,
+      "loss": 0.0,
+      "step": 8350
+    },
+    {
+      "epoch": 3.7333333333333334,
+      "grad_norm": 4.441513374331407e-05,
+      "learning_rate": 1.2671111111111112e-05,
+      "loss": 0.0,
+      "step": 8400
+    },
+    {
+      "epoch": 3.7555555555555555,
+      "grad_norm": 5.228264126344584e-05,
+      "learning_rate": 1.244888888888889e-05,
+      "loss": 0.0,
+      "step": 8450
+    },
+    {
+      "epoch": 3.7777777777777777,
+      "grad_norm": 4.654955773730762e-05,
+      "learning_rate": 1.2226666666666668e-05,
+      "loss": 0.0,
+      "step": 8500
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 5.188977957004681e-05,
+      "learning_rate": 1.2004444444444444e-05,
+      "loss": 0.0,
+      "step": 8550
+    },
+    {
+      "epoch": 3.822222222222222,
+      "grad_norm": 0.002792242681607604,
+      "learning_rate": 1.1782222222222222e-05,
+      "loss": 0.0,
+      "step": 8600
+    },
+    {
+      "epoch": 3.8444444444444446,
+      "grad_norm": 5.102707291371189e-05,
+      "learning_rate": 1.156e-05,
+      "loss": 0.0,
+      "step": 8650
+    },
+    {
+      "epoch": 3.8666666666666667,
+      "grad_norm": 4.642659405362792e-05,
+      "learning_rate": 1.1337777777777777e-05,
+      "loss": 0.0,
+      "step": 8700
+    },
+    {
+      "epoch": 3.888888888888889,
+      "grad_norm": 4.101167724002153e-05,
+      "learning_rate": 1.1115555555555555e-05,
+      "loss": 0.0,
+      "step": 8750
+    },
+    {
+      "epoch": 3.911111111111111,
+      "grad_norm": 4.7815203288337216e-05,
+      "learning_rate": 1.0893333333333333e-05,
+      "loss": 0.0,
+      "step": 8800
+    },
+    {
+      "epoch": 3.9333333333333336,
+      "grad_norm": 4.7473920858465135e-05,
+      "learning_rate": 1.0671111111111112e-05,
+      "loss": 0.0,
+      "step": 8850
+    },
+    {
+      "epoch": 3.9555555555555557,
+      "grad_norm": 4.0860795706976205e-05,
+      "learning_rate": 1.044888888888889e-05,
+      "loss": 0.0,
+      "step": 8900
+    },
+    {
+      "epoch": 3.977777777777778,
+      "grad_norm": 3.960240792366676e-05,
+      "learning_rate": 1.0226666666666668e-05,
+      "loss": 0.0,
+      "step": 8950
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 4.005260416306555e-05,
+      "learning_rate": 1.0004444444444446e-05,
+      "loss": 0.0,
+      "step": 9000
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 3.7440361211338313e-07,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      8666\n    EDUCATION       1.00      1.00      1.00     10841\n     LANGUAGE       1.00      1.00      1.00      4050\n        SKILL       1.00      1.00      1.00     14410\n\n    micro avg       1.00      1.00      1.00     37967\n    macro avg       1.00      1.00      1.00     37967\n weighted avg       1.00      1.00      1.00     37967\n",
+      "eval_runtime": 39.2169,
+      "eval_samples_per_second": 50.998,
+      "eval_steps_per_second": 6.375,
+      "step": 9000
+    },
+    {
+      "epoch": 4.022222222222222,
+      "grad_norm": 4.486538819037378e-05,
+      "learning_rate": 9.782222222222224e-06,
+      "loss": 0.0,
+      "step": 9050
+    },
+    {
+      "epoch": 4.044444444444444,
+      "grad_norm": 4.269111741450615e-05,
+      "learning_rate": 9.560000000000002e-06,
+      "loss": 0.0,
+      "step": 9100
+    },
+    {
+      "epoch": 4.066666666666666,
+      "grad_norm": 3.833163282251917e-05,
+      "learning_rate": 9.337777777777778e-06,
+      "loss": 0.0,
+      "step": 9150
+    },
+    {
+      "epoch": 4.088888888888889,
+      "grad_norm": 5.278285243548453e-05,
+      "learning_rate": 9.115555555555556e-06,
+      "loss": 0.0,
+      "step": 9200
+    },
+    {
+      "epoch": 4.111111111111111,
+      "grad_norm": 3.422492227400653e-05,
+      "learning_rate": 8.893333333333333e-06,
+      "loss": 0.0,
+      "step": 9250
+    },
+    {
+      "epoch": 4.133333333333334,
+      "grad_norm": 3.486883360892534e-05,
+      "learning_rate": 8.671111111111111e-06,
+      "loss": 0.0,
+      "step": 9300
+    },
+    {
+      "epoch": 4.155555555555556,
+      "grad_norm": 3.659820868051611e-05,
+      "learning_rate": 8.448888888888889e-06,
+      "loss": 0.0,
+      "step": 9350
+    },
+    {
+      "epoch": 4.177777777777778,
+      "grad_norm": 3.242024467908777e-05,
+      "learning_rate": 8.226666666666667e-06,
+      "loss": 0.0,
+      "step": 9400
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.0001435010926797986,
+      "learning_rate": 8.004444444444445e-06,
+      "loss": 0.0,
+      "step": 9450
+    },
+    {
+      "epoch": 4.222222222222222,
+      "grad_norm": 3.879176438204013e-05,
+      "learning_rate": 7.782222222222223e-06,
+      "loss": 0.0,
+      "step": 9500
+    },
+    {
+      "epoch": 4.2444444444444445,
+      "grad_norm": 3.1664359994465485e-05,
+      "learning_rate": 7.5600000000000005e-06,
+      "loss": 0.0,
+      "step": 9550
+    },
+    {
+      "epoch": 4.266666666666667,
+      "grad_norm": 3.302018012618646e-05,
+      "learning_rate": 7.337777777777778e-06,
+      "loss": 0.0,
+      "step": 9600
+    },
+    {
+      "epoch": 4.288888888888889,
+      "grad_norm": 0.00034453265834599733,
+      "learning_rate": 7.115555555555556e-06,
+      "loss": 0.0,
+      "step": 9650
+    },
+    {
+      "epoch": 4.311111111111111,
+      "grad_norm": 3.3918331610038877e-05,
+      "learning_rate": 6.893333333333334e-06,
+      "loss": 0.0,
+      "step": 9700
+    },
+    {
+      "epoch": 4.333333333333333,
+      "grad_norm": 3.994768121629022e-05,
+      "learning_rate": 6.671111111111111e-06,
+      "loss": 0.0,
+      "step": 9750
+    },
+    {
+      "epoch": 4.355555555555555,
+      "grad_norm": 7.467544492101297e-05,
+      "learning_rate": 6.448888888888889e-06,
+      "loss": 0.0,
+      "step": 9800
+    },
+    {
+      "epoch": 4.377777777777778,
+      "grad_norm": 3.103712879237719e-05,
+      "learning_rate": 6.226666666666667e-06,
+      "loss": 0.0,
+      "step": 9850
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 4.751016604132019e-05,
+      "learning_rate": 6.0044444444444445e-06,
+      "loss": 0.0,
+      "step": 9900
+    },
+    {
+      "epoch": 4.4222222222222225,
+      "grad_norm": 9.309023153036833e-05,
+      "learning_rate": 5.782222222222222e-06,
+      "loss": 0.0,
+      "step": 9950
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 3.0390472602448426e-05,
+      "learning_rate": 5.56e-06,
+      "loss": 0.0,
+      "step": 10000
+    },
+    {
+      "epoch": 4.466666666666667,
+      "grad_norm": 3.085477146669291e-05,
+      "learning_rate": 5.337777777777778e-06,
+      "loss": 0.0,
+      "step": 10050
+    },
+    {
+      "epoch": 4.488888888888889,
+      "grad_norm": 3.4081851481460035e-05,
+      "learning_rate": 5.115555555555556e-06,
+      "loss": 0.0,
+      "step": 10100
+    },
+    {
+      "epoch": 4.511111111111111,
+      "grad_norm": 3.805098822340369e-05,
+      "learning_rate": 4.893333333333334e-06,
+      "loss": 0.0,
+      "step": 10150
+    },
+    {
+      "epoch": 4.533333333333333,
+      "grad_norm": 3.679243309306912e-05,
+      "learning_rate": 4.6711111111111115e-06,
+      "loss": 0.0,
+      "step": 10200
+    },
+    {
+      "epoch": 4.555555555555555,
+      "grad_norm": 3.425304385018535e-05,
+      "learning_rate": 4.448888888888889e-06,
+      "loss": 0.0,
+      "step": 10250
+    },
+    {
+      "epoch": 4.5777777777777775,
+      "grad_norm": 2.751518513832707e-05,
+      "learning_rate": 4.226666666666667e-06,
+      "loss": 0.0,
+      "step": 10300
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 2.8532382202683948e-05,
+      "learning_rate": 4.004444444444445e-06,
+      "loss": 0.0,
+      "step": 10350
+    },
+    {
+      "epoch": 4.622222222222222,
+      "grad_norm": 2.9013817766099237e-05,
+      "learning_rate": 3.7822222222222224e-06,
+      "loss": 0.0,
+      "step": 10400
+    },
+    {
+      "epoch": 4.644444444444445,
+      "grad_norm": 3.07061527564656e-05,
+      "learning_rate": 3.5600000000000002e-06,
+      "loss": 0.0,
+      "step": 10450
+    },
+    {
+      "epoch": 4.666666666666667,
+      "grad_norm": 2.944734660559334e-05,
+      "learning_rate": 3.337777777777778e-06,
+      "loss": 0.0,
+      "step": 10500
+    },
+    {
+      "epoch": 4.688888888888889,
+      "grad_norm": 2.6814725060830824e-05,
+      "learning_rate": 3.1155555555555555e-06,
+      "loss": 0.0,
+      "step": 10550
+    },
+    {
+      "epoch": 4.711111111111111,
+      "grad_norm": 3.8631915231235325e-05,
+      "learning_rate": 2.8933333333333333e-06,
+      "loss": 0.0,
+      "step": 10600
+    },
+    {
+      "epoch": 4.733333333333333,
+      "grad_norm": 4.212988278595731e-05,
+      "learning_rate": 2.6711111111111116e-06,
+      "loss": 0.0,
+      "step": 10650
+    },
+    {
+      "epoch": 4.7555555555555555,
+      "grad_norm": 2.7735224648495205e-05,
+      "learning_rate": 2.448888888888889e-06,
+      "loss": 0.0,
+      "step": 10700
+    },
+    {
+      "epoch": 4.777777777777778,
+      "grad_norm": 3.2194642699323595e-05,
+      "learning_rate": 2.226666666666667e-06,
+      "loss": 0.0,
+      "step": 10750
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 2.7552601750358008e-05,
+      "learning_rate": 2.0044444444444446e-06,
+      "loss": 0.0,
+      "step": 10800
+    },
+    {
+      "epoch": 4.822222222222222,
+      "grad_norm": 3.464681503828615e-05,
+      "learning_rate": 1.7822222222222223e-06,
+      "loss": 0.0,
+      "step": 10850
+    },
+    {
+      "epoch": 4.844444444444444,
+      "grad_norm": 2.8678237868007272e-05,
+      "learning_rate": 1.56e-06,
+      "loss": 0.0,
+      "step": 10900
+    },
+    {
+      "epoch": 4.866666666666667,
+      "grad_norm": 2.6989377147401683e-05,
+      "learning_rate": 1.337777777777778e-06,
+      "loss": 0.0,
+      "step": 10950
+    },
+    {
+      "epoch": 4.888888888888889,
+      "grad_norm": 2.9060267479508184e-05,
+      "learning_rate": 1.1155555555555556e-06,
+      "loss": 0.0,
+      "step": 11000
+    },
+    {
+      "epoch": 4.911111111111111,
+      "grad_norm": 4.428656757227145e-05,
+      "learning_rate": 8.933333333333334e-07,
+      "loss": 0.0,
+      "step": 11050
+    },
+    {
+      "epoch": 4.933333333333334,
+      "grad_norm": 2.6587131287669763e-05,
+      "learning_rate": 6.711111111111111e-07,
+      "loss": 0.0,
+      "step": 11100
+    },
+    {
+      "epoch": 4.955555555555556,
+      "grad_norm": 3.082263719988987e-05,
+      "learning_rate": 4.488888888888889e-07,
+      "loss": 0.0,
+      "step": 11150
+    },
+    {
+      "epoch": 4.977777777777778,
+      "grad_norm": 3.0073399102548137e-05,
+      "learning_rate": 2.2666666666666668e-07,
+      "loss": 0.0,
+      "step": 11200
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 2.95465397357475e-05,
+      "learning_rate": 4.444444444444445e-09,
+      "loss": 0.0,
+      "step": 11250
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 11250,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0362271779289728e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-11250/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2437d3569274a4022188190da6f34872c3e1e86886e244e0bd97efb2e6b5384c
+size 5713

config.json CHANGED Viewed

@@ -10,28 +10,28 @@
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
-    "0": "LABEL_0",
-    "1": "LABEL_1",
-    "2": "LABEL_2",
-    "3": "LABEL_3",
-    "4": "LABEL_4",
-    "5": "LABEL_5",
-    "6": "LABEL_6",
-    "7": "LABEL_7",
-    "8": "LABEL_8"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
-    "LABEL_0": 0,
-    "LABEL_1": 1,
-    "LABEL_2": 2,
-    "LABEL_3": 3,
-    "LABEL_4": 4,
-    "LABEL_5": 5,
-    "LABEL_6": 6,
-    "LABEL_7": 7,
-    "LABEL_8": 8
   },
   "layer_norm_eps": 1e-07,
   "legacy": true,

   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
+    "0": "O",
+    "1": "B-EDUCATION",
+    "2": "I-EDUCATION",
+    "3": "B-SKILL",
+    "4": "I-SKILL",
+    "5": "B-CERTIFICATION",
+    "6": "I-CERTIFICATION",
+    "7": "B-LANGUAGE",
+    "8": "I-LANGUAGE"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
+    "B-CERTIFICATION": 5,
+    "B-EDUCATION": 1,
+    "B-LANGUAGE": 7,
+    "B-SKILL": 3,
+    "I-CERTIFICATION": 6,
+    "I-EDUCATION": 2,
+    "I-LANGUAGE": 8,
+    "I-SKILL": 4,
+    "O": 0
   },
   "layer_norm_eps": 1e-07,
   "legacy": true,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42eacd77743b7a41ad451c542aa07b5056fe7c7f890f7ab016bc148113f71f09
 size 5713

 version https://git-lfs.github.com/spec/v1
+oid sha256:2437d3569274a4022188190da6f34872c3e1e86886e244e0bd97efb2e6b5384c
 size 5713