Shrav20 commited on Sep 14, 2025

Commit

1211eca

verified ·

1 Parent(s): 8ddc087

Upload folder using huggingface_hub

Browse files

Files changed (29) hide show

checkpoint-5000/added_tokens.json +3 -0
checkpoint-5000/config.json +59 -0
checkpoint-5000/model.safetensors +3 -0
checkpoint-5000/optimizer.pt +3 -0
checkpoint-5000/rng_state.pth +3 -0
checkpoint-5000/scaler.pt +3 -0
checkpoint-5000/scheduler.pt +3 -0
checkpoint-5000/special_tokens_map.json +15 -0
checkpoint-5000/spm.model +3 -0
checkpoint-5000/tokenizer.json +0 -0
checkpoint-5000/tokenizer_config.json +59 -0
checkpoint-5000/trainer_state.json +752 -0
checkpoint-5000/training_args.bin +3 -0
checkpoint-5064/added_tokens.json +3 -0
checkpoint-5064/config.json +59 -0
checkpoint-5064/model.safetensors +3 -0
checkpoint-5064/optimizer.pt +3 -0
checkpoint-5064/rng_state.pth +3 -0
checkpoint-5064/scaler.pt +3 -0
checkpoint-5064/scheduler.pt +3 -0
checkpoint-5064/special_tokens_map.json +15 -0
checkpoint-5064/spm.model +3 -0
checkpoint-5064/tokenizer.json +0 -0
checkpoint-5064/tokenizer_config.json +59 -0
checkpoint-5064/trainer_state.json +759 -0
checkpoint-5064/training_args.bin +3 -0
config.json +18 -18
tokenizer.json +1 -6
training_args.bin +3 -0

checkpoint-5000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

checkpoint-5000/config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "architectures": [
+    "DebertaV2ForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 1,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8
+  },
+  "layer_norm_eps": 1e-07,
+  "legacy": true,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "transformers_version": "4.56.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

checkpoint-5000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29caa67a5a5d3c257059d7bf9719ae6a49fbd154be64ef7866c62aaf36b681ec
+size 735378268

checkpoint-5000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:621dc4d4656d0480d1a56778748ba2b4b22c61d167927a05a38e4a241a203c56
+size 1470878283

checkpoint-5000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b48dfd571d881f4a8068c767e2be8fb52102ed0832c2de99c399a418d2d658c6
+size 14645

checkpoint-5000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:203b45607d9b09c4083e736a94ab0083b09046bdeb2eb3cf626d7fe59682b08f
+size 1383

checkpoint-5000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ee65c2046475aac0172642791112a7b39d987d84e2ba816f08aa22687a5e9d9
+size 1465

checkpoint-5000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-5000/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

checkpoint-5000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-5000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

checkpoint-5000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,752 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.962085308056872,
+  "eval_steps": 500,
+  "global_step": 5000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02962085308056872,
+      "grad_norm": 0.0359644778072834,
+      "learning_rate": 4.951619273301738e-05,
+      "loss": 0.3566,
+      "step": 50
+    },
+    {
+      "epoch": 0.05924170616113744,
+      "grad_norm": 0.045561712235212326,
+      "learning_rate": 4.902251184834124e-05,
+      "loss": 0.0019,
+      "step": 100
+    },
+    {
+      "epoch": 0.08886255924170616,
+      "grad_norm": 0.011206220835447311,
+      "learning_rate": 4.852883096366509e-05,
+      "loss": 0.0017,
+      "step": 150
+    },
+    {
+      "epoch": 0.11848341232227488,
+      "grad_norm": 0.008807787671685219,
+      "learning_rate": 4.8035150078988947e-05,
+      "loss": 0.0006,
+      "step": 200
+    },
+    {
+      "epoch": 0.1481042654028436,
+      "grad_norm": 0.005796543322503567,
+      "learning_rate": 4.75414691943128e-05,
+      "loss": 0.0009,
+      "step": 250
+    },
+    {
+      "epoch": 0.17772511848341233,
+      "grad_norm": 0.005227432586252689,
+      "learning_rate": 4.7047788309636656e-05,
+      "loss": 0.0006,
+      "step": 300
+    },
+    {
+      "epoch": 0.20734597156398105,
+      "grad_norm": 0.003554289462044835,
+      "learning_rate": 4.655410742496051e-05,
+      "loss": 0.0005,
+      "step": 350
+    },
+    {
+      "epoch": 0.23696682464454977,
+      "grad_norm": 0.0033741986844688654,
+      "learning_rate": 4.6060426540284365e-05,
+      "loss": 0.0012,
+      "step": 400
+    },
+    {
+      "epoch": 0.2665876777251185,
+      "grad_norm": 0.002832002704963088,
+      "learning_rate": 4.556674565560822e-05,
+      "loss": 0.0002,
+      "step": 450
+    },
+    {
+      "epoch": 0.2962085308056872,
+      "grad_norm": 0.002275377744808793,
+      "learning_rate": 4.5073064770932074e-05,
+      "loss": 0.0007,
+      "step": 500
+    },
+    {
+      "epoch": 0.32582938388625593,
+      "grad_norm": 0.003911971114575863,
+      "learning_rate": 4.457938388625593e-05,
+      "loss": 0.0006,
+      "step": 550
+    },
+    {
+      "epoch": 0.35545023696682465,
+      "grad_norm": 0.006926015485078096,
+      "learning_rate": 4.408570300157978e-05,
+      "loss": 0.0014,
+      "step": 600
+    },
+    {
+      "epoch": 0.38507109004739337,
+      "grad_norm": 0.0027338722720742226,
+      "learning_rate": 4.359202211690364e-05,
+      "loss": 0.0008,
+      "step": 650
+    },
+    {
+      "epoch": 0.4146919431279621,
+      "grad_norm": 0.001827694708481431,
+      "learning_rate": 4.309834123222749e-05,
+      "loss": 0.0003,
+      "step": 700
+    },
+    {
+      "epoch": 0.4443127962085308,
+      "grad_norm": 0.0012653936864808202,
+      "learning_rate": 4.2604660347551346e-05,
+      "loss": 0.0001,
+      "step": 750
+    },
+    {
+      "epoch": 0.47393364928909953,
+      "grad_norm": 0.0022165332920849323,
+      "learning_rate": 4.21109794628752e-05,
+      "loss": 0.0004,
+      "step": 800
+    },
+    {
+      "epoch": 0.5035545023696683,
+      "grad_norm": 0.0063213687390089035,
+      "learning_rate": 4.1617298578199055e-05,
+      "loss": 0.0007,
+      "step": 850
+    },
+    {
+      "epoch": 0.533175355450237,
+      "grad_norm": 0.024515969678759575,
+      "learning_rate": 4.112361769352291e-05,
+      "loss": 0.001,
+      "step": 900
+    },
+    {
+      "epoch": 0.5627962085308057,
+      "grad_norm": 0.003376233857125044,
+      "learning_rate": 4.0629936808846765e-05,
+      "loss": 0.0005,
+      "step": 950
+    },
+    {
+      "epoch": 0.5924170616113744,
+      "grad_norm": 0.001513678696937859,
+      "learning_rate": 4.013625592417062e-05,
+      "loss": 0.0002,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6220379146919431,
+      "grad_norm": 0.013340185396373272,
+      "learning_rate": 3.9642575039494474e-05,
+      "loss": 0.0004,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6516587677725119,
+      "grad_norm": 0.0008107981411740184,
+      "learning_rate": 3.914889415481833e-05,
+      "loss": 0.0001,
+      "step": 1100
+    },
+    {
+      "epoch": 0.6812796208530806,
+      "grad_norm": 0.000826548261102289,
+      "learning_rate": 3.865521327014218e-05,
+      "loss": 0.0,
+      "step": 1150
+    },
+    {
+      "epoch": 0.7109004739336493,
+      "grad_norm": 0.0015754875494167209,
+      "learning_rate": 3.816153238546604e-05,
+      "loss": 0.0003,
+      "step": 1200
+    },
+    {
+      "epoch": 0.740521327014218,
+      "grad_norm": 0.0010875407606363297,
+      "learning_rate": 3.766785150078989e-05,
+      "loss": 0.0001,
+      "step": 1250
+    },
+    {
+      "epoch": 0.7701421800947867,
+      "grad_norm": 0.0013485795352607965,
+      "learning_rate": 3.7174170616113746e-05,
+      "loss": 0.0002,
+      "step": 1300
+    },
+    {
+      "epoch": 0.7997630331753555,
+      "grad_norm": 0.002195018110796809,
+      "learning_rate": 3.66804897314376e-05,
+      "loss": 0.0008,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8293838862559242,
+      "grad_norm": 0.0013226654846221209,
+      "learning_rate": 3.6186808846761455e-05,
+      "loss": 0.0004,
+      "step": 1400
+    },
+    {
+      "epoch": 0.8590047393364929,
+      "grad_norm": 0.020256407558918,
+      "learning_rate": 3.569312796208531e-05,
+      "loss": 0.0011,
+      "step": 1450
+    },
+    {
+      "epoch": 0.8886255924170616,
+      "grad_norm": 0.0060112737119197845,
+      "learning_rate": 3.5199447077409164e-05,
+      "loss": 0.0003,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9182464454976303,
+      "grad_norm": 0.0010298583656549454,
+      "learning_rate": 3.470576619273302e-05,
+      "loss": 0.0002,
+      "step": 1550
+    },
+    {
+      "epoch": 0.9478672985781991,
+      "grad_norm": 0.0008804717799648643,
+      "learning_rate": 3.4212085308056873e-05,
+      "loss": 0.0003,
+      "step": 1600
+    },
+    {
+      "epoch": 0.9774881516587678,
+      "grad_norm": 0.0007368926890194416,
+      "learning_rate": 3.371840442338073e-05,
+      "loss": 0.0002,
+      "step": 1650
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 6.839788693469018e-05,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      1410\n    EDUCATION       1.00      1.00      1.00      2241\n     LANGUAGE       1.00      1.00      1.00      3014\n        SKILL       1.00      1.00      1.00      3069\n\n    micro avg       1.00      1.00      1.00      9734\n    macro avg       1.00      1.00      1.00      9734\n weighted avg       1.00      1.00      1.00      9734\n",
+      "eval_runtime": 7.1833,
+      "eval_samples_per_second": 208.818,
+      "eval_steps_per_second": 26.172,
+      "step": 1688
+    },
+    {
+      "epoch": 1.0071090047393365,
+      "grad_norm": 0.0006786159938201308,
+      "learning_rate": 3.322472353870458e-05,
+      "loss": 0.0002,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0367298578199051,
+      "grad_norm": 0.005513947457075119,
+      "learning_rate": 3.273104265402844e-05,
+      "loss": 0.0001,
+      "step": 1750
+    },
+    {
+      "epoch": 1.066350710900474,
+      "grad_norm": 0.000574503734242171,
+      "learning_rate": 3.223736176935229e-05,
+      "loss": 0.0001,
+      "step": 1800
+    },
+    {
+      "epoch": 1.0959715639810426,
+      "grad_norm": 0.004586311522871256,
+      "learning_rate": 3.1743680884676146e-05,
+      "loss": 0.0006,
+      "step": 1850
+    },
+    {
+      "epoch": 1.1255924170616114,
+      "grad_norm": 0.000738324539270252,
+      "learning_rate": 3.125e-05,
+      "loss": 0.0002,
+      "step": 1900
+    },
+    {
+      "epoch": 1.15521327014218,
+      "grad_norm": 0.0006399003323167562,
+      "learning_rate": 3.0756319115323855e-05,
+      "loss": 0.0001,
+      "step": 1950
+    },
+    {
+      "epoch": 1.1848341232227488,
+      "grad_norm": 0.0004596344952005893,
+      "learning_rate": 3.026263823064771e-05,
+      "loss": 0.0,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2144549763033174,
+      "grad_norm": 0.0004429569817148149,
+      "learning_rate": 2.9768957345971564e-05,
+      "loss": 0.0001,
+      "step": 2050
+    },
+    {
+      "epoch": 1.2440758293838863,
+      "grad_norm": 0.0004196607042104006,
+      "learning_rate": 2.927527646129542e-05,
+      "loss": 0.0,
+      "step": 2100
+    },
+    {
+      "epoch": 1.2736966824644549,
+      "grad_norm": 0.0004485426179599017,
+      "learning_rate": 2.8781595576619273e-05,
+      "loss": 0.0,
+      "step": 2150
+    },
+    {
+      "epoch": 1.3033175355450237,
+      "grad_norm": 0.0005169134237803519,
+      "learning_rate": 2.8287914691943128e-05,
+      "loss": 0.0001,
+      "step": 2200
+    },
+    {
+      "epoch": 1.3329383886255926,
+      "grad_norm": 0.0005291880224831402,
+      "learning_rate": 2.7794233807266982e-05,
+      "loss": 0.0002,
+      "step": 2250
+    },
+    {
+      "epoch": 1.3625592417061612,
+      "grad_norm": 0.00048425907152704895,
+      "learning_rate": 2.7300552922590837e-05,
+      "loss": 0.0001,
+      "step": 2300
+    },
+    {
+      "epoch": 1.3921800947867298,
+      "grad_norm": 0.0004068867419846356,
+      "learning_rate": 2.6806872037914695e-05,
+      "loss": 0.0,
+      "step": 2350
+    },
+    {
+      "epoch": 1.4218009478672986,
+      "grad_norm": 0.0003516751166898757,
+      "learning_rate": 2.631319115323855e-05,
+      "loss": 0.0,
+      "step": 2400
+    },
+    {
+      "epoch": 1.4514218009478674,
+      "grad_norm": 0.0003432795056141913,
+      "learning_rate": 2.5819510268562404e-05,
+      "loss": 0.0,
+      "step": 2450
+    },
+    {
+      "epoch": 1.481042654028436,
+      "grad_norm": 0.00032765124342404306,
+      "learning_rate": 2.532582938388626e-05,
+      "loss": 0.0,
+      "step": 2500
+    },
+    {
+      "epoch": 1.5106635071090047,
+      "grad_norm": 0.00030620096367783844,
+      "learning_rate": 2.4832148499210113e-05,
+      "loss": 0.0,
+      "step": 2550
+    },
+    {
+      "epoch": 1.5402843601895735,
+      "grad_norm": 0.00032276054844260216,
+      "learning_rate": 2.4338467614533967e-05,
+      "loss": 0.0,
+      "step": 2600
+    },
+    {
+      "epoch": 1.5699052132701423,
+      "grad_norm": 0.0003284791891928762,
+      "learning_rate": 2.3844786729857822e-05,
+      "loss": 0.0,
+      "step": 2650
+    },
+    {
+      "epoch": 1.599526066350711,
+      "grad_norm": 0.0005517126410268247,
+      "learning_rate": 2.3351105845181677e-05,
+      "loss": 0.0001,
+      "step": 2700
+    },
+    {
+      "epoch": 1.6291469194312795,
+      "grad_norm": 0.0004335689009167254,
+      "learning_rate": 2.285742496050553e-05,
+      "loss": 0.0,
+      "step": 2750
+    },
+    {
+      "epoch": 1.6587677725118484,
+      "grad_norm": 0.00938709732145071,
+      "learning_rate": 2.2363744075829386e-05,
+      "loss": 0.0011,
+      "step": 2800
+    },
+    {
+      "epoch": 1.6883886255924172,
+      "grad_norm": 0.002625273773446679,
+      "learning_rate": 2.187006319115324e-05,
+      "loss": 0.0,
+      "step": 2850
+    },
+    {
+      "epoch": 1.7180094786729858,
+      "grad_norm": 0.00048340365174226463,
+      "learning_rate": 2.1376382306477095e-05,
+      "loss": 0.0,
+      "step": 2900
+    },
+    {
+      "epoch": 1.7476303317535544,
+      "grad_norm": 0.0035711589735001326,
+      "learning_rate": 2.088270142180095e-05,
+      "loss": 0.0005,
+      "step": 2950
+    },
+    {
+      "epoch": 1.7772511848341233,
+      "grad_norm": 0.0005380721995607018,
+      "learning_rate": 2.0389020537124804e-05,
+      "loss": 0.0001,
+      "step": 3000
+    },
+    {
+      "epoch": 1.806872037914692,
+      "grad_norm": 0.00039379362715408206,
+      "learning_rate": 1.9895339652448658e-05,
+      "loss": 0.0,
+      "step": 3050
+    },
+    {
+      "epoch": 1.8364928909952607,
+      "grad_norm": 0.00031137277255766094,
+      "learning_rate": 1.9401658767772513e-05,
+      "loss": 0.0001,
+      "step": 3100
+    },
+    {
+      "epoch": 1.8661137440758293,
+      "grad_norm": 0.00033859844552353024,
+      "learning_rate": 1.8907977883096367e-05,
+      "loss": 0.0003,
+      "step": 3150
+    },
+    {
+      "epoch": 1.8957345971563981,
+      "grad_norm": 0.0003177137696184218,
+      "learning_rate": 1.8414296998420222e-05,
+      "loss": 0.0,
+      "step": 3200
+    },
+    {
+      "epoch": 1.925355450236967,
+      "grad_norm": 0.5252463817596436,
+      "learning_rate": 1.7920616113744076e-05,
+      "loss": 0.0001,
+      "step": 3250
+    },
+    {
+      "epoch": 1.9549763033175356,
+      "grad_norm": 0.000578847888391465,
+      "learning_rate": 1.742693522906793e-05,
+      "loss": 0.0002,
+      "step": 3300
+    },
+    {
+      "epoch": 1.9845971563981042,
+      "grad_norm": 0.00031036767177283764,
+      "learning_rate": 1.6933254344391785e-05,
+      "loss": 0.0,
+      "step": 3350
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.0002486561133991927,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      1410\n    EDUCATION       1.00      1.00      1.00      2241\n     LANGUAGE       1.00      1.00      1.00      3014\n        SKILL       1.00      1.00      1.00      3069\n\n    micro avg       1.00      1.00      1.00      9734\n    macro avg       1.00      1.00      1.00      9734\n weighted avg       1.00      1.00      1.00      9734\n",
+      "eval_runtime": 7.0179,
+      "eval_samples_per_second": 213.738,
+      "eval_steps_per_second": 26.788,
+      "step": 3376
+    },
+    {
+      "epoch": 2.014218009478673,
+      "grad_norm": 0.00038110482273623347,
+      "learning_rate": 1.643957345971564e-05,
+      "loss": 0.0002,
+      "step": 3400
+    },
+    {
+      "epoch": 2.043838862559242,
+      "grad_norm": 0.00030082205194048584,
+      "learning_rate": 1.5945892575039495e-05,
+      "loss": 0.0,
+      "step": 3450
+    },
+    {
+      "epoch": 2.0734597156398102,
+      "grad_norm": 0.00030870226328261197,
+      "learning_rate": 1.545221169036335e-05,
+      "loss": 0.0,
+      "step": 3500
+    },
+    {
+      "epoch": 2.103080568720379,
+      "grad_norm": 0.00023404941020999104,
+      "learning_rate": 1.4958530805687204e-05,
+      "loss": 0.0001,
+      "step": 3550
+    },
+    {
+      "epoch": 2.132701421800948,
+      "grad_norm": 0.00021994848793838173,
+      "learning_rate": 1.4464849921011058e-05,
+      "loss": 0.0,
+      "step": 3600
+    },
+    {
+      "epoch": 2.1623222748815167,
+      "grad_norm": 0.0002600239240564406,
+      "learning_rate": 1.3971169036334913e-05,
+      "loss": 0.0,
+      "step": 3650
+    },
+    {
+      "epoch": 2.191943127962085,
+      "grad_norm": 0.00019250177138019353,
+      "learning_rate": 1.3477488151658769e-05,
+      "loss": 0.0,
+      "step": 3700
+    },
+    {
+      "epoch": 2.221563981042654,
+      "grad_norm": 0.00024143581686075777,
+      "learning_rate": 1.2983807266982623e-05,
+      "loss": 0.0,
+      "step": 3750
+    },
+    {
+      "epoch": 2.251184834123223,
+      "grad_norm": 0.00020565264276228845,
+      "learning_rate": 1.2490126382306478e-05,
+      "loss": 0.0,
+      "step": 3800
+    },
+    {
+      "epoch": 2.2808056872037916,
+      "grad_norm": 0.0002789797727018595,
+      "learning_rate": 1.1996445497630332e-05,
+      "loss": 0.0002,
+      "step": 3850
+    },
+    {
+      "epoch": 2.31042654028436,
+      "grad_norm": 0.0007835368160158396,
+      "learning_rate": 1.1502764612954187e-05,
+      "loss": 0.0007,
+      "step": 3900
+    },
+    {
+      "epoch": 2.340047393364929,
+      "grad_norm": 0.0004727982450276613,
+      "learning_rate": 1.1009083728278042e-05,
+      "loss": 0.0,
+      "step": 3950
+    },
+    {
+      "epoch": 2.3696682464454977,
+      "grad_norm": 0.0008073291974142194,
+      "learning_rate": 1.0515402843601896e-05,
+      "loss": 0.0,
+      "step": 4000
+    },
+    {
+      "epoch": 2.3992890995260665,
+      "grad_norm": 0.15108434855937958,
+      "learning_rate": 1.002172195892575e-05,
+      "loss": 0.0,
+      "step": 4050
+    },
+    {
+      "epoch": 2.428909952606635,
+      "grad_norm": 0.00033067440381273627,
+      "learning_rate": 9.528041074249605e-06,
+      "loss": 0.0,
+      "step": 4100
+    },
+    {
+      "epoch": 2.4585308056872037,
+      "grad_norm": 0.019667765125632286,
+      "learning_rate": 9.03436018957346e-06,
+      "loss": 0.0,
+      "step": 4150
+    },
+    {
+      "epoch": 2.4881516587677726,
+      "grad_norm": 0.000259611289948225,
+      "learning_rate": 8.540679304897314e-06,
+      "loss": 0.0,
+      "step": 4200
+    },
+    {
+      "epoch": 2.5177725118483414,
+      "grad_norm": 0.0002708205720409751,
+      "learning_rate": 8.046998420221169e-06,
+      "loss": 0.0003,
+      "step": 4250
+    },
+    {
+      "epoch": 2.5473933649289098,
+      "grad_norm": 0.0002382330858381465,
+      "learning_rate": 7.553317535545023e-06,
+      "loss": 0.0,
+      "step": 4300
+    },
+    {
+      "epoch": 2.5770142180094786,
+      "grad_norm": 0.0002609147340990603,
+      "learning_rate": 7.059636650868879e-06,
+      "loss": 0.0,
+      "step": 4350
+    },
+    {
+      "epoch": 2.6066350710900474,
+      "grad_norm": 0.00025002885377034545,
+      "learning_rate": 6.565955766192733e-06,
+      "loss": 0.0,
+      "step": 4400
+    },
+    {
+      "epoch": 2.6362559241706163,
+      "grad_norm": 0.00023832859005779028,
+      "learning_rate": 6.0722748815165886e-06,
+      "loss": 0.0,
+      "step": 4450
+    },
+    {
+      "epoch": 2.665876777251185,
+      "grad_norm": 0.0002076889795716852,
+      "learning_rate": 5.578593996840443e-06,
+      "loss": 0.0,
+      "step": 4500
+    },
+    {
+      "epoch": 2.6954976303317535,
+      "grad_norm": 0.0002478805836290121,
+      "learning_rate": 5.084913112164298e-06,
+      "loss": 0.0,
+      "step": 4550
+    },
+    {
+      "epoch": 2.7251184834123223,
+      "grad_norm": 0.00021155517606530339,
+      "learning_rate": 4.591232227488152e-06,
+      "loss": 0.0,
+      "step": 4600
+    },
+    {
+      "epoch": 2.754739336492891,
+      "grad_norm": 0.00021754855697508901,
+      "learning_rate": 4.097551342812007e-06,
+      "loss": 0.0,
+      "step": 4650
+    },
+    {
+      "epoch": 2.7843601895734595,
+      "grad_norm": 0.00020183408923912793,
+      "learning_rate": 3.6038704581358612e-06,
+      "loss": 0.0,
+      "step": 4700
+    },
+    {
+      "epoch": 2.8139810426540284,
+      "grad_norm": 0.00022289449407253414,
+      "learning_rate": 3.1101895734597158e-06,
+      "loss": 0.0,
+      "step": 4750
+    },
+    {
+      "epoch": 2.843601895734597,
+      "grad_norm": 0.007142237853258848,
+      "learning_rate": 2.6165086887835703e-06,
+      "loss": 0.0,
+      "step": 4800
+    },
+    {
+      "epoch": 2.873222748815166,
+      "grad_norm": 0.00019705097656697035,
+      "learning_rate": 2.122827804107425e-06,
+      "loss": 0.0,
+      "step": 4850
+    },
+    {
+      "epoch": 2.902843601895735,
+      "grad_norm": 0.00023452220193576068,
+      "learning_rate": 1.6291469194312798e-06,
+      "loss": 0.0001,
+      "step": 4900
+    },
+    {
+      "epoch": 2.9324644549763033,
+      "grad_norm": 0.000205856587854214,
+      "learning_rate": 1.1354660347551343e-06,
+      "loss": 0.0,
+      "step": 4950
+    },
+    {
+      "epoch": 2.962085308056872,
+      "grad_norm": 0.00020924191630911082,
+      "learning_rate": 6.41785150078989e-07,
+      "loss": 0.0,
+      "step": 5000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 5064,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1246789065733344.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-5000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42eacd77743b7a41ad451c542aa07b5056fe7c7f890f7ab016bc148113f71f09
+size 5713

checkpoint-5064/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

checkpoint-5064/config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "architectures": [
+    "DebertaV2ForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 1,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8
+  },
+  "layer_norm_eps": 1e-07,
+  "legacy": true,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "transformers_version": "4.56.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

checkpoint-5064/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f29cefcbe4ebeab55797f5c52bcf7591e39877beed3a9ce01bde796777ba29a1
+size 735378268

checkpoint-5064/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74a7035f474cc1cd56a5299ffc0cee20d9a1f579af144133f05cf3469fd5d967
+size 1470878283

checkpoint-5064/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e4237637ed2a909ac55fe085c627ab99fc039d3c6aa9183495fa775e1f00ba6
+size 14645

checkpoint-5064/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac8bf92429ffb94154ae048d975d1ba548e2690d1bb98a4ccbc355bf5d60771f
+size 1383

checkpoint-5064/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a12cd4840f3c33f044bc06ba15cb1faedfb4c5136e557bf713cd04e8c9bf14b
+size 1465

checkpoint-5064/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-5064/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

checkpoint-5064/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-5064/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

checkpoint-5064/trainer_state.json ADDED Viewed

	@@ -0,0 +1,759 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 5064,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02962085308056872,
+      "grad_norm": 0.0359644778072834,
+      "learning_rate": 4.951619273301738e-05,
+      "loss": 0.3566,
+      "step": 50
+    },
+    {
+      "epoch": 0.05924170616113744,
+      "grad_norm": 0.045561712235212326,
+      "learning_rate": 4.902251184834124e-05,
+      "loss": 0.0019,
+      "step": 100
+    },
+    {
+      "epoch": 0.08886255924170616,
+      "grad_norm": 0.011206220835447311,
+      "learning_rate": 4.852883096366509e-05,
+      "loss": 0.0017,
+      "step": 150
+    },
+    {
+      "epoch": 0.11848341232227488,
+      "grad_norm": 0.008807787671685219,
+      "learning_rate": 4.8035150078988947e-05,
+      "loss": 0.0006,
+      "step": 200
+    },
+    {
+      "epoch": 0.1481042654028436,
+      "grad_norm": 0.005796543322503567,
+      "learning_rate": 4.75414691943128e-05,
+      "loss": 0.0009,
+      "step": 250
+    },
+    {
+      "epoch": 0.17772511848341233,
+      "grad_norm": 0.005227432586252689,
+      "learning_rate": 4.7047788309636656e-05,
+      "loss": 0.0006,
+      "step": 300
+    },
+    {
+      "epoch": 0.20734597156398105,
+      "grad_norm": 0.003554289462044835,
+      "learning_rate": 4.655410742496051e-05,
+      "loss": 0.0005,
+      "step": 350
+    },
+    {
+      "epoch": 0.23696682464454977,
+      "grad_norm": 0.0033741986844688654,
+      "learning_rate": 4.6060426540284365e-05,
+      "loss": 0.0012,
+      "step": 400
+    },
+    {
+      "epoch": 0.2665876777251185,
+      "grad_norm": 0.002832002704963088,
+      "learning_rate": 4.556674565560822e-05,
+      "loss": 0.0002,
+      "step": 450
+    },
+    {
+      "epoch": 0.2962085308056872,
+      "grad_norm": 0.002275377744808793,
+      "learning_rate": 4.5073064770932074e-05,
+      "loss": 0.0007,
+      "step": 500
+    },
+    {
+      "epoch": 0.32582938388625593,
+      "grad_norm": 0.003911971114575863,
+      "learning_rate": 4.457938388625593e-05,
+      "loss": 0.0006,
+      "step": 550
+    },
+    {
+      "epoch": 0.35545023696682465,
+      "grad_norm": 0.006926015485078096,
+      "learning_rate": 4.408570300157978e-05,
+      "loss": 0.0014,
+      "step": 600
+    },
+    {
+      "epoch": 0.38507109004739337,
+      "grad_norm": 0.0027338722720742226,
+      "learning_rate": 4.359202211690364e-05,
+      "loss": 0.0008,
+      "step": 650
+    },
+    {
+      "epoch": 0.4146919431279621,
+      "grad_norm": 0.001827694708481431,
+      "learning_rate": 4.309834123222749e-05,
+      "loss": 0.0003,
+      "step": 700
+    },
+    {
+      "epoch": 0.4443127962085308,
+      "grad_norm": 0.0012653936864808202,
+      "learning_rate": 4.2604660347551346e-05,
+      "loss": 0.0001,
+      "step": 750
+    },
+    {
+      "epoch": 0.47393364928909953,
+      "grad_norm": 0.0022165332920849323,
+      "learning_rate": 4.21109794628752e-05,
+      "loss": 0.0004,
+      "step": 800
+    },
+    {
+      "epoch": 0.5035545023696683,
+      "grad_norm": 0.0063213687390089035,
+      "learning_rate": 4.1617298578199055e-05,
+      "loss": 0.0007,
+      "step": 850
+    },
+    {
+      "epoch": 0.533175355450237,
+      "grad_norm": 0.024515969678759575,
+      "learning_rate": 4.112361769352291e-05,
+      "loss": 0.001,
+      "step": 900
+    },
+    {
+      "epoch": 0.5627962085308057,
+      "grad_norm": 0.003376233857125044,
+      "learning_rate": 4.0629936808846765e-05,
+      "loss": 0.0005,
+      "step": 950
+    },
+    {
+      "epoch": 0.5924170616113744,
+      "grad_norm": 0.001513678696937859,
+      "learning_rate": 4.013625592417062e-05,
+      "loss": 0.0002,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6220379146919431,
+      "grad_norm": 0.013340185396373272,
+      "learning_rate": 3.9642575039494474e-05,
+      "loss": 0.0004,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6516587677725119,
+      "grad_norm": 0.0008107981411740184,
+      "learning_rate": 3.914889415481833e-05,
+      "loss": 0.0001,
+      "step": 1100
+    },
+    {
+      "epoch": 0.6812796208530806,
+      "grad_norm": 0.000826548261102289,
+      "learning_rate": 3.865521327014218e-05,
+      "loss": 0.0,
+      "step": 1150
+    },
+    {
+      "epoch": 0.7109004739336493,
+      "grad_norm": 0.0015754875494167209,
+      "learning_rate": 3.816153238546604e-05,
+      "loss": 0.0003,
+      "step": 1200
+    },
+    {
+      "epoch": 0.740521327014218,
+      "grad_norm": 0.0010875407606363297,
+      "learning_rate": 3.766785150078989e-05,
+      "loss": 0.0001,
+      "step": 1250
+    },
+    {
+      "epoch": 0.7701421800947867,
+      "grad_norm": 0.0013485795352607965,
+      "learning_rate": 3.7174170616113746e-05,
+      "loss": 0.0002,
+      "step": 1300
+    },
+    {
+      "epoch": 0.7997630331753555,
+      "grad_norm": 0.002195018110796809,
+      "learning_rate": 3.66804897314376e-05,
+      "loss": 0.0008,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8293838862559242,
+      "grad_norm": 0.0013226654846221209,
+      "learning_rate": 3.6186808846761455e-05,
+      "loss": 0.0004,
+      "step": 1400
+    },
+    {
+      "epoch": 0.8590047393364929,
+      "grad_norm": 0.020256407558918,
+      "learning_rate": 3.569312796208531e-05,
+      "loss": 0.0011,
+      "step": 1450
+    },
+    {
+      "epoch": 0.8886255924170616,
+      "grad_norm": 0.0060112737119197845,
+      "learning_rate": 3.5199447077409164e-05,
+      "loss": 0.0003,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9182464454976303,
+      "grad_norm": 0.0010298583656549454,
+      "learning_rate": 3.470576619273302e-05,
+      "loss": 0.0002,
+      "step": 1550
+    },
+    {
+      "epoch": 0.9478672985781991,
+      "grad_norm": 0.0008804717799648643,
+      "learning_rate": 3.4212085308056873e-05,
+      "loss": 0.0003,
+      "step": 1600
+    },
+    {
+      "epoch": 0.9774881516587678,
+      "grad_norm": 0.0007368926890194416,
+      "learning_rate": 3.371840442338073e-05,
+      "loss": 0.0002,
+      "step": 1650
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 6.839788693469018e-05,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      1410\n    EDUCATION       1.00      1.00      1.00      2241\n     LANGUAGE       1.00      1.00      1.00      3014\n        SKILL       1.00      1.00      1.00      3069\n\n    micro avg       1.00      1.00      1.00      9734\n    macro avg       1.00      1.00      1.00      9734\n weighted avg       1.00      1.00      1.00      9734\n",
+      "eval_runtime": 7.1833,
+      "eval_samples_per_second": 208.818,
+      "eval_steps_per_second": 26.172,
+      "step": 1688
+    },
+    {
+      "epoch": 1.0071090047393365,
+      "grad_norm": 0.0006786159938201308,
+      "learning_rate": 3.322472353870458e-05,
+      "loss": 0.0002,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0367298578199051,
+      "grad_norm": 0.005513947457075119,
+      "learning_rate": 3.273104265402844e-05,
+      "loss": 0.0001,
+      "step": 1750
+    },
+    {
+      "epoch": 1.066350710900474,
+      "grad_norm": 0.000574503734242171,
+      "learning_rate": 3.223736176935229e-05,
+      "loss": 0.0001,
+      "step": 1800
+    },
+    {
+      "epoch": 1.0959715639810426,
+      "grad_norm": 0.004586311522871256,
+      "learning_rate": 3.1743680884676146e-05,
+      "loss": 0.0006,
+      "step": 1850
+    },
+    {
+      "epoch": 1.1255924170616114,
+      "grad_norm": 0.000738324539270252,
+      "learning_rate": 3.125e-05,
+      "loss": 0.0002,
+      "step": 1900
+    },
+    {
+      "epoch": 1.15521327014218,
+      "grad_norm": 0.0006399003323167562,
+      "learning_rate": 3.0756319115323855e-05,
+      "loss": 0.0001,
+      "step": 1950
+    },
+    {
+      "epoch": 1.1848341232227488,
+      "grad_norm": 0.0004596344952005893,
+      "learning_rate": 3.026263823064771e-05,
+      "loss": 0.0,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2144549763033174,
+      "grad_norm": 0.0004429569817148149,
+      "learning_rate": 2.9768957345971564e-05,
+      "loss": 0.0001,
+      "step": 2050
+    },
+    {
+      "epoch": 1.2440758293838863,
+      "grad_norm": 0.0004196607042104006,
+      "learning_rate": 2.927527646129542e-05,
+      "loss": 0.0,
+      "step": 2100
+    },
+    {
+      "epoch": 1.2736966824644549,
+      "grad_norm": 0.0004485426179599017,
+      "learning_rate": 2.8781595576619273e-05,
+      "loss": 0.0,
+      "step": 2150
+    },
+    {
+      "epoch": 1.3033175355450237,
+      "grad_norm": 0.0005169134237803519,
+      "learning_rate": 2.8287914691943128e-05,
+      "loss": 0.0001,
+      "step": 2200
+    },
+    {
+      "epoch": 1.3329383886255926,
+      "grad_norm": 0.0005291880224831402,
+      "learning_rate": 2.7794233807266982e-05,
+      "loss": 0.0002,
+      "step": 2250
+    },
+    {
+      "epoch": 1.3625592417061612,
+      "grad_norm": 0.00048425907152704895,
+      "learning_rate": 2.7300552922590837e-05,
+      "loss": 0.0001,
+      "step": 2300
+    },
+    {
+      "epoch": 1.3921800947867298,
+      "grad_norm": 0.0004068867419846356,
+      "learning_rate": 2.6806872037914695e-05,
+      "loss": 0.0,
+      "step": 2350
+    },
+    {
+      "epoch": 1.4218009478672986,
+      "grad_norm": 0.0003516751166898757,
+      "learning_rate": 2.631319115323855e-05,
+      "loss": 0.0,
+      "step": 2400
+    },
+    {
+      "epoch": 1.4514218009478674,
+      "grad_norm": 0.0003432795056141913,
+      "learning_rate": 2.5819510268562404e-05,
+      "loss": 0.0,
+      "step": 2450
+    },
+    {
+      "epoch": 1.481042654028436,
+      "grad_norm": 0.00032765124342404306,
+      "learning_rate": 2.532582938388626e-05,
+      "loss": 0.0,
+      "step": 2500
+    },
+    {
+      "epoch": 1.5106635071090047,
+      "grad_norm": 0.00030620096367783844,
+      "learning_rate": 2.4832148499210113e-05,
+      "loss": 0.0,
+      "step": 2550
+    },
+    {
+      "epoch": 1.5402843601895735,
+      "grad_norm": 0.00032276054844260216,
+      "learning_rate": 2.4338467614533967e-05,
+      "loss": 0.0,
+      "step": 2600
+    },
+    {
+      "epoch": 1.5699052132701423,
+      "grad_norm": 0.0003284791891928762,
+      "learning_rate": 2.3844786729857822e-05,
+      "loss": 0.0,
+      "step": 2650
+    },
+    {
+      "epoch": 1.599526066350711,
+      "grad_norm": 0.0005517126410268247,
+      "learning_rate": 2.3351105845181677e-05,
+      "loss": 0.0001,
+      "step": 2700
+    },
+    {
+      "epoch": 1.6291469194312795,
+      "grad_norm": 0.0004335689009167254,
+      "learning_rate": 2.285742496050553e-05,
+      "loss": 0.0,
+      "step": 2750
+    },
+    {
+      "epoch": 1.6587677725118484,
+      "grad_norm": 0.00938709732145071,
+      "learning_rate": 2.2363744075829386e-05,
+      "loss": 0.0011,
+      "step": 2800
+    },
+    {
+      "epoch": 1.6883886255924172,
+      "grad_norm": 0.002625273773446679,
+      "learning_rate": 2.187006319115324e-05,
+      "loss": 0.0,
+      "step": 2850
+    },
+    {
+      "epoch": 1.7180094786729858,
+      "grad_norm": 0.00048340365174226463,
+      "learning_rate": 2.1376382306477095e-05,
+      "loss": 0.0,
+      "step": 2900
+    },
+    {
+      "epoch": 1.7476303317535544,
+      "grad_norm": 0.0035711589735001326,
+      "learning_rate": 2.088270142180095e-05,
+      "loss": 0.0005,
+      "step": 2950
+    },
+    {
+      "epoch": 1.7772511848341233,
+      "grad_norm": 0.0005380721995607018,
+      "learning_rate": 2.0389020537124804e-05,
+      "loss": 0.0001,
+      "step": 3000
+    },
+    {
+      "epoch": 1.806872037914692,
+      "grad_norm": 0.00039379362715408206,
+      "learning_rate": 1.9895339652448658e-05,
+      "loss": 0.0,
+      "step": 3050
+    },
+    {
+      "epoch": 1.8364928909952607,
+      "grad_norm": 0.00031137277255766094,
+      "learning_rate": 1.9401658767772513e-05,
+      "loss": 0.0001,
+      "step": 3100
+    },
+    {
+      "epoch": 1.8661137440758293,
+      "grad_norm": 0.00033859844552353024,
+      "learning_rate": 1.8907977883096367e-05,
+      "loss": 0.0003,
+      "step": 3150
+    },
+    {
+      "epoch": 1.8957345971563981,
+      "grad_norm": 0.0003177137696184218,
+      "learning_rate": 1.8414296998420222e-05,
+      "loss": 0.0,
+      "step": 3200
+    },
+    {
+      "epoch": 1.925355450236967,
+      "grad_norm": 0.5252463817596436,
+      "learning_rate": 1.7920616113744076e-05,
+      "loss": 0.0001,
+      "step": 3250
+    },
+    {
+      "epoch": 1.9549763033175356,
+      "grad_norm": 0.000578847888391465,
+      "learning_rate": 1.742693522906793e-05,
+      "loss": 0.0002,
+      "step": 3300
+    },
+    {
+      "epoch": 1.9845971563981042,
+      "grad_norm": 0.00031036767177283764,
+      "learning_rate": 1.6933254344391785e-05,
+      "loss": 0.0,
+      "step": 3350
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.0002486561133991927,
+      "eval_report": "               precision    recall  f1-score   support\n\nCERTIFICATION       1.00      1.00      1.00      1410\n    EDUCATION       1.00      1.00      1.00      2241\n     LANGUAGE       1.00      1.00      1.00      3014\n        SKILL       1.00      1.00      1.00      3069\n\n    micro avg       1.00      1.00      1.00      9734\n    macro avg       1.00      1.00      1.00      9734\n weighted avg       1.00      1.00      1.00      9734\n",
+      "eval_runtime": 7.0179,
+      "eval_samples_per_second": 213.738,
+      "eval_steps_per_second": 26.788,
+      "step": 3376
+    },
+    {
+      "epoch": 2.014218009478673,
+      "grad_norm": 0.00038110482273623347,
+      "learning_rate": 1.643957345971564e-05,
+      "loss": 0.0002,
+      "step": 3400
+    },
+    {
+      "epoch": 2.043838862559242,
+      "grad_norm": 0.00030082205194048584,
+      "learning_rate": 1.5945892575039495e-05,
+      "loss": 0.0,
+      "step": 3450
+    },
+    {
+      "epoch": 2.0734597156398102,
+      "grad_norm": 0.00030870226328261197,
+      "learning_rate": 1.545221169036335e-05,
+      "loss": 0.0,
+      "step": 3500
+    },
+    {
+      "epoch": 2.103080568720379,
+      "grad_norm": 0.00023404941020999104,
+      "learning_rate": 1.4958530805687204e-05,
+      "loss": 0.0001,
+      "step": 3550
+    },
+    {
+      "epoch": 2.132701421800948,
+      "grad_norm": 0.00021994848793838173,
+      "learning_rate": 1.4464849921011058e-05,
+      "loss": 0.0,
+      "step": 3600
+    },
+    {
+      "epoch": 2.1623222748815167,
+      "grad_norm": 0.0002600239240564406,
+      "learning_rate": 1.3971169036334913e-05,
+      "loss": 0.0,
+      "step": 3650
+    },
+    {
+      "epoch": 2.191943127962085,
+      "grad_norm": 0.00019250177138019353,
+      "learning_rate": 1.3477488151658769e-05,
+      "loss": 0.0,
+      "step": 3700
+    },
+    {
+      "epoch": 2.221563981042654,
+      "grad_norm": 0.00024143581686075777,
+      "learning_rate": 1.2983807266982623e-05,
+      "loss": 0.0,
+      "step": 3750
+    },
+    {
+      "epoch": 2.251184834123223,
+      "grad_norm": 0.00020565264276228845,
+      "learning_rate": 1.2490126382306478e-05,
+      "loss": 0.0,
+      "step": 3800
+    },
+    {
+      "epoch": 2.2808056872037916,
+      "grad_norm": 0.0002789797727018595,
+      "learning_rate": 1.1996445497630332e-05,
+      "loss": 0.0002,
+      "step": 3850
+    },
+    {
+      "epoch": 2.31042654028436,
+      "grad_norm": 0.0007835368160158396,
+      "learning_rate": 1.1502764612954187e-05,
+      "loss": 0.0007,
+      "step": 3900
+    },
+    {
+      "epoch": 2.340047393364929,
+      "grad_norm": 0.0004727982450276613,
+      "learning_rate": 1.1009083728278042e-05,
+      "loss": 0.0,
+      "step": 3950
+    },
+    {
+      "epoch": 2.3696682464454977,
+      "grad_norm": 0.0008073291974142194,
+      "learning_rate": 1.0515402843601896e-05,
+      "loss": 0.0,
+      "step": 4000
+    },
+    {
+      "epoch": 2.3992890995260665,
+      "grad_norm": 0.15108434855937958,
+      "learning_rate": 1.002172195892575e-05,
+      "loss": 0.0,
+      "step": 4050
+    },
+    {
+      "epoch": 2.428909952606635,
+      "grad_norm": 0.00033067440381273627,
+      "learning_rate": 9.528041074249605e-06,
+      "loss": 0.0,
+      "step": 4100
+    },
+    {
+      "epoch": 2.4585308056872037,
+      "grad_norm": 0.019667765125632286,
+      "learning_rate": 9.03436018957346e-06,
+      "loss": 0.0,
+      "step": 4150
+    },
+    {
+      "epoch": 2.4881516587677726,
+      "grad_norm": 0.000259611289948225,
+      "learning_rate": 8.540679304897314e-06,
+      "loss": 0.0,
+      "step": 4200
+    },
+    {
+      "epoch": 2.5177725118483414,
+      "grad_norm": 0.0002708205720409751,
+      "learning_rate": 8.046998420221169e-06,
+      "loss": 0.0003,
+      "step": 4250
+    },
+    {
+      "epoch": 2.5473933649289098,
+      "grad_norm": 0.0002382330858381465,
+      "learning_rate": 7.553317535545023e-06,
+      "loss": 0.0,
+      "step": 4300
+    },
+    {
+      "epoch": 2.5770142180094786,
+      "grad_norm": 0.0002609147340990603,
+      "learning_rate": 7.059636650868879e-06,
+      "loss": 0.0,
+      "step": 4350
+    },
+    {
+      "epoch": 2.6066350710900474,
+      "grad_norm": 0.00025002885377034545,
+      "learning_rate": 6.565955766192733e-06,
+      "loss": 0.0,
+      "step": 4400
+    },
+    {
+      "epoch": 2.6362559241706163,
+      "grad_norm": 0.00023832859005779028,
+      "learning_rate": 6.0722748815165886e-06,
+      "loss": 0.0,
+      "step": 4450
+    },
+    {
+      "epoch": 2.665876777251185,
+      "grad_norm": 0.0002076889795716852,
+      "learning_rate": 5.578593996840443e-06,
+      "loss": 0.0,
+      "step": 4500
+    },
+    {
+      "epoch": 2.6954976303317535,
+      "grad_norm": 0.0002478805836290121,
+      "learning_rate": 5.084913112164298e-06,
+      "loss": 0.0,
+      "step": 4550
+    },
+    {
+      "epoch": 2.7251184834123223,
+      "grad_norm": 0.00021155517606530339,
+      "learning_rate": 4.591232227488152e-06,
+      "loss": 0.0,
+      "step": 4600
+    },
+    {
+      "epoch": 2.754739336492891,
+      "grad_norm": 0.00021754855697508901,
+      "learning_rate": 4.097551342812007e-06,
+      "loss": 0.0,
+      "step": 4650
+    },
+    {
+      "epoch": 2.7843601895734595,
+      "grad_norm": 0.00020183408923912793,
+      "learning_rate": 3.6038704581358612e-06,
+      "loss": 0.0,
+      "step": 4700
+    },
+    {
+      "epoch": 2.8139810426540284,
+      "grad_norm": 0.00022289449407253414,
+      "learning_rate": 3.1101895734597158e-06,
+      "loss": 0.0,
+      "step": 4750
+    },
+    {
+      "epoch": 2.843601895734597,
+      "grad_norm": 0.007142237853258848,
+      "learning_rate": 2.6165086887835703e-06,
+      "loss": 0.0,
+      "step": 4800
+    },
+    {
+      "epoch": 2.873222748815166,
+      "grad_norm": 0.00019705097656697035,
+      "learning_rate": 2.122827804107425e-06,
+      "loss": 0.0,
+      "step": 4850
+    },
+    {
+      "epoch": 2.902843601895735,
+      "grad_norm": 0.00023452220193576068,
+      "learning_rate": 1.6291469194312798e-06,
+      "loss": 0.0001,
+      "step": 4900
+    },
+    {
+      "epoch": 2.9324644549763033,
+      "grad_norm": 0.000205856587854214,
+      "learning_rate": 1.1354660347551343e-06,
+      "loss": 0.0,
+      "step": 4950
+    },
+    {
+      "epoch": 2.962085308056872,
+      "grad_norm": 0.00020924191630911082,
+      "learning_rate": 6.41785150078989e-07,
+      "loss": 0.0,
+      "step": 5000
+    },
+    {
+      "epoch": 2.991706161137441,
+      "grad_norm": 0.00019217448425479233,
+      "learning_rate": 1.481042654028436e-07,
+      "loss": 0.0,
+      "step": 5050
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 5064,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1262694758161032.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-5064/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42eacd77743b7a41ad451c542aa07b5056fe7c7f890f7ab016bc148113f71f09
+size 5713

config.json CHANGED Viewed

@@ -10,28 +10,28 @@
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
-    "0": "LABEL_0",
-    "1": "LABEL_1",
-    "2": "LABEL_2",
-    "3": "LABEL_3",
-    "4": "LABEL_4",
-    "5": "LABEL_5",
-    "6": "LABEL_6",
-    "7": "LABEL_7",
-    "8": "LABEL_8"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
-    "LABEL_0": 0,
-    "LABEL_1": 1,
-    "LABEL_2": 2,
-    "LABEL_3": 3,
-    "LABEL_4": 4,
-    "LABEL_5": 5,
-    "LABEL_6": 6,
-    "LABEL_7": 7,
-    "LABEL_8": 8
   },
   "layer_norm_eps": 1e-07,
   "legacy": true,

   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
+    "0": "O",
+    "1": "B-EDUCATION",
+    "2": "I-EDUCATION",
+    "3": "B-SKILL",
+    "4": "I-SKILL",
+    "5": "B-CERTIFICATION",
+    "6": "I-CERTIFICATION",
+    "7": "B-LANGUAGE",
+    "8": "I-LANGUAGE"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
+    "B-CERTIFICATION": 5,
+    "B-EDUCATION": 1,
+    "B-LANGUAGE": 7,
+    "B-SKILL": 3,
+    "I-CERTIFICATION": 6,
+    "I-EDUCATION": 2,
+    "I-LANGUAGE": 8,
+    "I-SKILL": 4,
+    "O": 0
   },
   "layer_norm_eps": 1e-07,
   "legacy": true,

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42eacd77743b7a41ad451c542aa07b5056fe7c7f890f7ab016bc148113f71f09
+size 5713