PuxAI commited on
Commit
1789d70
·
verified ·
1 Parent(s): be2ece8

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/config.json +129 -0
  2. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/model.safetensors +3 -0
  3. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/optimizer.pt +3 -0
  4. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/rng_state.pth +3 -0
  5. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/scheduler.pt +3 -0
  6. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/tokenizer.json +0 -0
  7. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/tokenizer_config.json +23 -0
  8. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/trainer_state.json +48 -0
  9. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/training_args.bin +3 -0
  10. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/config.json +129 -0
  11. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/model.safetensors +3 -0
  12. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/optimizer.pt +3 -0
  13. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/rng_state.pth +3 -0
  14. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/scheduler.pt +3 -0
  15. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/tokenizer.json +0 -0
  16. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/tokenizer_config.json +23 -0
  17. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/trainer_state.json +55 -0
  18. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/training_args.bin +3 -0
  19. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/config.json +129 -0
  20. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/model.safetensors +3 -0
  21. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/optimizer.pt +3 -0
  22. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/rng_state.pth +3 -0
  23. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/scheduler.pt +3 -0
  24. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/tokenizer.json +0 -0
  25. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/tokenizer_config.json +23 -0
  26. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/trainer_state.json +62 -0
  27. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/training_args.bin +3 -0
  28. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/config.json +129 -0
  29. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/model.safetensors +3 -0
  30. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/optimizer.pt +3 -0
  31. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/rng_state.pth +3 -0
  32. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/scheduler.pt +3 -0
  33. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/tokenizer.json +0 -0
  34. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/tokenizer_config.json +23 -0
  35. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/trainer_state.json +69 -0
  36. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/training_args.bin +3 -0
  37. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/config.json +129 -0
  38. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/model.safetensors +3 -0
  39. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/optimizer.pt +3 -0
  40. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/rng_state.pth +3 -0
  41. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/scheduler.pt +3 -0
  42. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/tokenizer.json +0 -0
  43. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/tokenizer_config.json +23 -0
  44. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/trainer_state.json +76 -0
  45. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/training_args.bin +3 -0
  46. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/config.json +129 -0
  47. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/model.safetensors +3 -0
  48. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/optimizer.pt +3 -0
  49. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/rng_state.pth +3 -0
  50. gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/scheduler.pt +3 -0
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaAdvancedSpanClassifier"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "ACCOUNT_NUMBER",
15
+ "2": "ADDRESS",
16
+ "3": "API_KEY",
17
+ "4": "BANK_ROUTING_NUMBER",
18
+ "5": "BIOMETRIC_IDENTIFIER",
19
+ "6": "CERTIFICATE_LICENSE_NUMBER",
20
+ "7": "CITY",
21
+ "8": "COMPANY_NAME",
22
+ "9": "COORDINATE",
23
+ "10": "COUNTRY",
24
+ "11": "CREDIT_CARD_NUMBER",
25
+ "12": "CUSTOMER_ID",
26
+ "13": "CVV",
27
+ "14": "DATE",
28
+ "15": "DATE_OF_BIRTH",
29
+ "16": "DATE_TIME",
30
+ "17": "DEVICE_IDENTIFIER",
31
+ "18": "EMAIL",
32
+ "19": "EMPLOYEE_ID",
33
+ "20": "FIRST_NAME",
34
+ "21": "HEALTH_PLAN_BENEFICIARY_NUMBER",
35
+ "22": "IPV4",
36
+ "23": "IPV6",
37
+ "24": "LAST_NAME",
38
+ "25": "LICENSE_PLATE",
39
+ "26": "MEDICAL_RECORD_NUMBER",
40
+ "27": "NAME",
41
+ "28": "NATIONAL_ID",
42
+ "29": "PASSWORD",
43
+ "30": "PHONE_NUMBER",
44
+ "31": "PIN",
45
+ "32": "POSTCODE",
46
+ "33": "SSN",
47
+ "34": "STATE",
48
+ "35": "STREET_ADDRESS",
49
+ "36": "SWIFT_BIC",
50
+ "37": "TAX_ID",
51
+ "38": "TIME",
52
+ "39": "UNIQUE_IDENTIFIER",
53
+ "40": "URL",
54
+ "41": "USER_NAME",
55
+ "42": "VEHICLE_IDENTIFIER"
56
+ },
57
+ "initializer_range": 0.02,
58
+ "intermediate_size": 3072,
59
+ "label2id": {
60
+ "ACCOUNT_NUMBER": 1,
61
+ "ADDRESS": 2,
62
+ "API_KEY": 3,
63
+ "BANK_ROUTING_NUMBER": 4,
64
+ "BIOMETRIC_IDENTIFIER": 5,
65
+ "CERTIFICATE_LICENSE_NUMBER": 6,
66
+ "CITY": 7,
67
+ "COMPANY_NAME": 8,
68
+ "COORDINATE": 9,
69
+ "COUNTRY": 10,
70
+ "CREDIT_CARD_NUMBER": 11,
71
+ "CUSTOMER_ID": 12,
72
+ "CVV": 13,
73
+ "DATE": 14,
74
+ "DATE_OF_BIRTH": 15,
75
+ "DATE_TIME": 16,
76
+ "DEVICE_IDENTIFIER": 17,
77
+ "EMAIL": 18,
78
+ "EMPLOYEE_ID": 19,
79
+ "FIRST_NAME": 20,
80
+ "HEALTH_PLAN_BENEFICIARY_NUMBER": 21,
81
+ "IPV4": 22,
82
+ "IPV6": 23,
83
+ "LAST_NAME": 24,
84
+ "LICENSE_PLATE": 25,
85
+ "MEDICAL_RECORD_NUMBER": 26,
86
+ "NAME": 27,
87
+ "NATIONAL_ID": 28,
88
+ "O": 0,
89
+ "PASSWORD": 29,
90
+ "PHONE_NUMBER": 30,
91
+ "PIN": 31,
92
+ "POSTCODE": 32,
93
+ "SSN": 33,
94
+ "STATE": 34,
95
+ "STREET_ADDRESS": 35,
96
+ "SWIFT_BIC": 36,
97
+ "TAX_ID": 37,
98
+ "TIME": 38,
99
+ "UNIQUE_IDENTIFIER": 39,
100
+ "URL": 40,
101
+ "USER_NAME": 41,
102
+ "VEHICLE_IDENTIFIER": 42
103
+ },
104
+ "layer_norm_eps": 1e-07,
105
+ "legacy": true,
106
+ "max_position_embeddings": 512,
107
+ "max_relative_positions": -1,
108
+ "model_type": "deberta-v2",
109
+ "norm_rel_ebd": "layer_norm",
110
+ "num_attention_heads": 12,
111
+ "num_hidden_layers": 12,
112
+ "pad_token_id": 0,
113
+ "pooler_dropout": 0,
114
+ "pooler_hidden_act": "gelu",
115
+ "pooler_hidden_size": 768,
116
+ "pos_att_type": [
117
+ "p2c",
118
+ "c2p"
119
+ ],
120
+ "position_biased_input": false,
121
+ "position_buckets": 256,
122
+ "relative_attention": true,
123
+ "share_att_key": true,
124
+ "tie_word_embeddings": true,
125
+ "transformers_version": "5.3.0",
126
+ "type_vocab_size": 0,
127
+ "use_cache": false,
128
+ "vocab_size": 128100
129
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4237f1837d361877e6781a9117acea7f99eba47f1741261dc0cf9e5eb85d0ef
3
+ size 741461252
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf17a7028a1cd8baba2e54ece7ac9f7ba40a55b91895c3f9977a8064a529a96c
3
+ size 1483046155
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf74db41c5b7817df072953eef5da7fdb172e7463221a43a10ef1835d0ac1ab3
3
+ size 14645
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d44bf9ae0e35689f4b9b151c53d5857013479ba8bbccaacae155d0657f7f880
3
+ size 1465
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.32,
6
+ "eval_steps": 500,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.16,
14
+ "grad_norm": 0.011844736523926258,
15
+ "learning_rate": 1.84032e-05,
16
+ "loss": 0.008585992813110352,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.32,
21
+ "grad_norm": 0.013846625573933125,
22
+ "learning_rate": 1.6803200000000002e-05,
23
+ "loss": 0.002212381362915039,
24
+ "step": 1000
25
+ }
26
+ ],
27
+ "logging_steps": 500,
28
+ "max_steps": 6250,
29
+ "num_input_tokens_seen": 0,
30
+ "num_train_epochs": 2,
31
+ "save_steps": 500,
32
+ "stateful_callbacks": {
33
+ "TrainerControl": {
34
+ "args": {
35
+ "should_epoch_stop": false,
36
+ "should_evaluate": false,
37
+ "should_log": false,
38
+ "should_save": true,
39
+ "should_training_stop": false
40
+ },
41
+ "attributes": {}
42
+ }
43
+ },
44
+ "total_flos": 1301031925789056.0,
45
+ "train_batch_size": 16,
46
+ "trial_name": null,
47
+ "trial_params": null
48
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ca52d6960d0f230eb8c8694adae8d1e9f927f7543570b5d172d34722cb7745
3
+ size 5201
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaAdvancedSpanClassifier"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "ACCOUNT_NUMBER",
15
+ "2": "ADDRESS",
16
+ "3": "API_KEY",
17
+ "4": "BANK_ROUTING_NUMBER",
18
+ "5": "BIOMETRIC_IDENTIFIER",
19
+ "6": "CERTIFICATE_LICENSE_NUMBER",
20
+ "7": "CITY",
21
+ "8": "COMPANY_NAME",
22
+ "9": "COORDINATE",
23
+ "10": "COUNTRY",
24
+ "11": "CREDIT_CARD_NUMBER",
25
+ "12": "CUSTOMER_ID",
26
+ "13": "CVV",
27
+ "14": "DATE",
28
+ "15": "DATE_OF_BIRTH",
29
+ "16": "DATE_TIME",
30
+ "17": "DEVICE_IDENTIFIER",
31
+ "18": "EMAIL",
32
+ "19": "EMPLOYEE_ID",
33
+ "20": "FIRST_NAME",
34
+ "21": "HEALTH_PLAN_BENEFICIARY_NUMBER",
35
+ "22": "IPV4",
36
+ "23": "IPV6",
37
+ "24": "LAST_NAME",
38
+ "25": "LICENSE_PLATE",
39
+ "26": "MEDICAL_RECORD_NUMBER",
40
+ "27": "NAME",
41
+ "28": "NATIONAL_ID",
42
+ "29": "PASSWORD",
43
+ "30": "PHONE_NUMBER",
44
+ "31": "PIN",
45
+ "32": "POSTCODE",
46
+ "33": "SSN",
47
+ "34": "STATE",
48
+ "35": "STREET_ADDRESS",
49
+ "36": "SWIFT_BIC",
50
+ "37": "TAX_ID",
51
+ "38": "TIME",
52
+ "39": "UNIQUE_IDENTIFIER",
53
+ "40": "URL",
54
+ "41": "USER_NAME",
55
+ "42": "VEHICLE_IDENTIFIER"
56
+ },
57
+ "initializer_range": 0.02,
58
+ "intermediate_size": 3072,
59
+ "label2id": {
60
+ "ACCOUNT_NUMBER": 1,
61
+ "ADDRESS": 2,
62
+ "API_KEY": 3,
63
+ "BANK_ROUTING_NUMBER": 4,
64
+ "BIOMETRIC_IDENTIFIER": 5,
65
+ "CERTIFICATE_LICENSE_NUMBER": 6,
66
+ "CITY": 7,
67
+ "COMPANY_NAME": 8,
68
+ "COORDINATE": 9,
69
+ "COUNTRY": 10,
70
+ "CREDIT_CARD_NUMBER": 11,
71
+ "CUSTOMER_ID": 12,
72
+ "CVV": 13,
73
+ "DATE": 14,
74
+ "DATE_OF_BIRTH": 15,
75
+ "DATE_TIME": 16,
76
+ "DEVICE_IDENTIFIER": 17,
77
+ "EMAIL": 18,
78
+ "EMPLOYEE_ID": 19,
79
+ "FIRST_NAME": 20,
80
+ "HEALTH_PLAN_BENEFICIARY_NUMBER": 21,
81
+ "IPV4": 22,
82
+ "IPV6": 23,
83
+ "LAST_NAME": 24,
84
+ "LICENSE_PLATE": 25,
85
+ "MEDICAL_RECORD_NUMBER": 26,
86
+ "NAME": 27,
87
+ "NATIONAL_ID": 28,
88
+ "O": 0,
89
+ "PASSWORD": 29,
90
+ "PHONE_NUMBER": 30,
91
+ "PIN": 31,
92
+ "POSTCODE": 32,
93
+ "SSN": 33,
94
+ "STATE": 34,
95
+ "STREET_ADDRESS": 35,
96
+ "SWIFT_BIC": 36,
97
+ "TAX_ID": 37,
98
+ "TIME": 38,
99
+ "UNIQUE_IDENTIFIER": 39,
100
+ "URL": 40,
101
+ "USER_NAME": 41,
102
+ "VEHICLE_IDENTIFIER": 42
103
+ },
104
+ "layer_norm_eps": 1e-07,
105
+ "legacy": true,
106
+ "max_position_embeddings": 512,
107
+ "max_relative_positions": -1,
108
+ "model_type": "deberta-v2",
109
+ "norm_rel_ebd": "layer_norm",
110
+ "num_attention_heads": 12,
111
+ "num_hidden_layers": 12,
112
+ "pad_token_id": 0,
113
+ "pooler_dropout": 0,
114
+ "pooler_hidden_act": "gelu",
115
+ "pooler_hidden_size": 768,
116
+ "pos_att_type": [
117
+ "p2c",
118
+ "c2p"
119
+ ],
120
+ "position_biased_input": false,
121
+ "position_buckets": 256,
122
+ "relative_attention": true,
123
+ "share_att_key": true,
124
+ "tie_word_embeddings": true,
125
+ "transformers_version": "5.3.0",
126
+ "type_vocab_size": 0,
127
+ "use_cache": false,
128
+ "vocab_size": 128100
129
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74d6123dbe5c6d346d5f53ceefbe4be95b9564fdbbad9c7b1e9483d3f5ea3439
3
+ size 741461252
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ac0deb4aeb4bc34384d0d06ee40b3f269c87304dcd8834f9e5224843823a0e
3
+ size 1483046155
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76e6792ff3d3c8d3b3c20abdf9744c1fca8cd8ed91b0e171000217dfc3aa013
3
+ size 14645
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8d5af094227dc05503eb0677f18a85328e15cc5fe45df57e03d7adf5bf4a9cd
3
+ size 1465
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.48,
6
+ "eval_steps": 500,
7
+ "global_step": 1500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.16,
14
+ "grad_norm": 0.011844736523926258,
15
+ "learning_rate": 1.84032e-05,
16
+ "loss": 0.008585992813110352,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.32,
21
+ "grad_norm": 0.013846625573933125,
22
+ "learning_rate": 1.6803200000000002e-05,
23
+ "loss": 0.002212381362915039,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.48,
28
+ "grad_norm": 0.011384272016584873,
29
+ "learning_rate": 1.52032e-05,
30
+ "loss": 0.0010507731437683105,
31
+ "step": 1500
32
+ }
33
+ ],
34
+ "logging_steps": 500,
35
+ "max_steps": 6250,
36
+ "num_input_tokens_seen": 0,
37
+ "num_train_epochs": 2,
38
+ "save_steps": 500,
39
+ "stateful_callbacks": {
40
+ "TrainerControl": {
41
+ "args": {
42
+ "should_epoch_stop": false,
43
+ "should_evaluate": false,
44
+ "should_log": false,
45
+ "should_save": true,
46
+ "should_training_stop": false
47
+ },
48
+ "attributes": {}
49
+ }
50
+ },
51
+ "total_flos": 1961154629442240.0,
52
+ "train_batch_size": 16,
53
+ "trial_name": null,
54
+ "trial_params": null
55
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ca52d6960d0f230eb8c8694adae8d1e9f927f7543570b5d172d34722cb7745
3
+ size 5201
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaAdvancedSpanClassifier"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "ACCOUNT_NUMBER",
15
+ "2": "ADDRESS",
16
+ "3": "API_KEY",
17
+ "4": "BANK_ROUTING_NUMBER",
18
+ "5": "BIOMETRIC_IDENTIFIER",
19
+ "6": "CERTIFICATE_LICENSE_NUMBER",
20
+ "7": "CITY",
21
+ "8": "COMPANY_NAME",
22
+ "9": "COORDINATE",
23
+ "10": "COUNTRY",
24
+ "11": "CREDIT_CARD_NUMBER",
25
+ "12": "CUSTOMER_ID",
26
+ "13": "CVV",
27
+ "14": "DATE",
28
+ "15": "DATE_OF_BIRTH",
29
+ "16": "DATE_TIME",
30
+ "17": "DEVICE_IDENTIFIER",
31
+ "18": "EMAIL",
32
+ "19": "EMPLOYEE_ID",
33
+ "20": "FIRST_NAME",
34
+ "21": "HEALTH_PLAN_BENEFICIARY_NUMBER",
35
+ "22": "IPV4",
36
+ "23": "IPV6",
37
+ "24": "LAST_NAME",
38
+ "25": "LICENSE_PLATE",
39
+ "26": "MEDICAL_RECORD_NUMBER",
40
+ "27": "NAME",
41
+ "28": "NATIONAL_ID",
42
+ "29": "PASSWORD",
43
+ "30": "PHONE_NUMBER",
44
+ "31": "PIN",
45
+ "32": "POSTCODE",
46
+ "33": "SSN",
47
+ "34": "STATE",
48
+ "35": "STREET_ADDRESS",
49
+ "36": "SWIFT_BIC",
50
+ "37": "TAX_ID",
51
+ "38": "TIME",
52
+ "39": "UNIQUE_IDENTIFIER",
53
+ "40": "URL",
54
+ "41": "USER_NAME",
55
+ "42": "VEHICLE_IDENTIFIER"
56
+ },
57
+ "initializer_range": 0.02,
58
+ "intermediate_size": 3072,
59
+ "label2id": {
60
+ "ACCOUNT_NUMBER": 1,
61
+ "ADDRESS": 2,
62
+ "API_KEY": 3,
63
+ "BANK_ROUTING_NUMBER": 4,
64
+ "BIOMETRIC_IDENTIFIER": 5,
65
+ "CERTIFICATE_LICENSE_NUMBER": 6,
66
+ "CITY": 7,
67
+ "COMPANY_NAME": 8,
68
+ "COORDINATE": 9,
69
+ "COUNTRY": 10,
70
+ "CREDIT_CARD_NUMBER": 11,
71
+ "CUSTOMER_ID": 12,
72
+ "CVV": 13,
73
+ "DATE": 14,
74
+ "DATE_OF_BIRTH": 15,
75
+ "DATE_TIME": 16,
76
+ "DEVICE_IDENTIFIER": 17,
77
+ "EMAIL": 18,
78
+ "EMPLOYEE_ID": 19,
79
+ "FIRST_NAME": 20,
80
+ "HEALTH_PLAN_BENEFICIARY_NUMBER": 21,
81
+ "IPV4": 22,
82
+ "IPV6": 23,
83
+ "LAST_NAME": 24,
84
+ "LICENSE_PLATE": 25,
85
+ "MEDICAL_RECORD_NUMBER": 26,
86
+ "NAME": 27,
87
+ "NATIONAL_ID": 28,
88
+ "O": 0,
89
+ "PASSWORD": 29,
90
+ "PHONE_NUMBER": 30,
91
+ "PIN": 31,
92
+ "POSTCODE": 32,
93
+ "SSN": 33,
94
+ "STATE": 34,
95
+ "STREET_ADDRESS": 35,
96
+ "SWIFT_BIC": 36,
97
+ "TAX_ID": 37,
98
+ "TIME": 38,
99
+ "UNIQUE_IDENTIFIER": 39,
100
+ "URL": 40,
101
+ "USER_NAME": 41,
102
+ "VEHICLE_IDENTIFIER": 42
103
+ },
104
+ "layer_norm_eps": 1e-07,
105
+ "legacy": true,
106
+ "max_position_embeddings": 512,
107
+ "max_relative_positions": -1,
108
+ "model_type": "deberta-v2",
109
+ "norm_rel_ebd": "layer_norm",
110
+ "num_attention_heads": 12,
111
+ "num_hidden_layers": 12,
112
+ "pad_token_id": 0,
113
+ "pooler_dropout": 0,
114
+ "pooler_hidden_act": "gelu",
115
+ "pooler_hidden_size": 768,
116
+ "pos_att_type": [
117
+ "p2c",
118
+ "c2p"
119
+ ],
120
+ "position_biased_input": false,
121
+ "position_buckets": 256,
122
+ "relative_attention": true,
123
+ "share_att_key": true,
124
+ "tie_word_embeddings": true,
125
+ "transformers_version": "5.3.0",
126
+ "type_vocab_size": 0,
127
+ "use_cache": false,
128
+ "vocab_size": 128100
129
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5310db55cb916868423be7b8ec128aa84a4b2d39e0e4f692cf07bfcc72cfd165
3
+ size 741461252
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6681bac0348c3b19cb9316ab440e5d1e4a3f5a62ee6183051e10d98c03becfd
3
+ size 1483046155
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e61cadd55a4beeb1d50a186bf8c5f1733657b791c75309d142151f646b119853
3
+ size 14645
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f14e0ac2b450c0127780c39f7bb444a58a497b4d63ae22e3e5987b44b3d0ddb
3
+ size 1465
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.64,
6
+ "eval_steps": 500,
7
+ "global_step": 2000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.16,
14
+ "grad_norm": 0.011844736523926258,
15
+ "learning_rate": 1.84032e-05,
16
+ "loss": 0.008585992813110352,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.32,
21
+ "grad_norm": 0.013846625573933125,
22
+ "learning_rate": 1.6803200000000002e-05,
23
+ "loss": 0.002212381362915039,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.48,
28
+ "grad_norm": 0.011384272016584873,
29
+ "learning_rate": 1.52032e-05,
30
+ "loss": 0.0010507731437683105,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.64,
35
+ "grad_norm": 0.007055494002997875,
36
+ "learning_rate": 1.36032e-05,
37
+ "loss": 0.0007087976336479187,
38
+ "step": 2000
39
+ }
40
+ ],
41
+ "logging_steps": 500,
42
+ "max_steps": 6250,
43
+ "num_input_tokens_seen": 0,
44
+ "num_train_epochs": 2,
45
+ "save_steps": 500,
46
+ "stateful_callbacks": {
47
+ "TrainerControl": {
48
+ "args": {
49
+ "should_epoch_stop": false,
50
+ "should_evaluate": false,
51
+ "should_log": false,
52
+ "should_save": true,
53
+ "should_training_stop": false
54
+ },
55
+ "attributes": {}
56
+ }
57
+ },
58
+ "total_flos": 2618102787273792.0,
59
+ "train_batch_size": 16,
60
+ "trial_name": null,
61
+ "trial_params": null
62
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ca52d6960d0f230eb8c8694adae8d1e9f927f7543570b5d172d34722cb7745
3
+ size 5201
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaAdvancedSpanClassifier"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "ACCOUNT_NUMBER",
15
+ "2": "ADDRESS",
16
+ "3": "API_KEY",
17
+ "4": "BANK_ROUTING_NUMBER",
18
+ "5": "BIOMETRIC_IDENTIFIER",
19
+ "6": "CERTIFICATE_LICENSE_NUMBER",
20
+ "7": "CITY",
21
+ "8": "COMPANY_NAME",
22
+ "9": "COORDINATE",
23
+ "10": "COUNTRY",
24
+ "11": "CREDIT_CARD_NUMBER",
25
+ "12": "CUSTOMER_ID",
26
+ "13": "CVV",
27
+ "14": "DATE",
28
+ "15": "DATE_OF_BIRTH",
29
+ "16": "DATE_TIME",
30
+ "17": "DEVICE_IDENTIFIER",
31
+ "18": "EMAIL",
32
+ "19": "EMPLOYEE_ID",
33
+ "20": "FIRST_NAME",
34
+ "21": "HEALTH_PLAN_BENEFICIARY_NUMBER",
35
+ "22": "IPV4",
36
+ "23": "IPV6",
37
+ "24": "LAST_NAME",
38
+ "25": "LICENSE_PLATE",
39
+ "26": "MEDICAL_RECORD_NUMBER",
40
+ "27": "NAME",
41
+ "28": "NATIONAL_ID",
42
+ "29": "PASSWORD",
43
+ "30": "PHONE_NUMBER",
44
+ "31": "PIN",
45
+ "32": "POSTCODE",
46
+ "33": "SSN",
47
+ "34": "STATE",
48
+ "35": "STREET_ADDRESS",
49
+ "36": "SWIFT_BIC",
50
+ "37": "TAX_ID",
51
+ "38": "TIME",
52
+ "39": "UNIQUE_IDENTIFIER",
53
+ "40": "URL",
54
+ "41": "USER_NAME",
55
+ "42": "VEHICLE_IDENTIFIER"
56
+ },
57
+ "initializer_range": 0.02,
58
+ "intermediate_size": 3072,
59
+ "label2id": {
60
+ "ACCOUNT_NUMBER": 1,
61
+ "ADDRESS": 2,
62
+ "API_KEY": 3,
63
+ "BANK_ROUTING_NUMBER": 4,
64
+ "BIOMETRIC_IDENTIFIER": 5,
65
+ "CERTIFICATE_LICENSE_NUMBER": 6,
66
+ "CITY": 7,
67
+ "COMPANY_NAME": 8,
68
+ "COORDINATE": 9,
69
+ "COUNTRY": 10,
70
+ "CREDIT_CARD_NUMBER": 11,
71
+ "CUSTOMER_ID": 12,
72
+ "CVV": 13,
73
+ "DATE": 14,
74
+ "DATE_OF_BIRTH": 15,
75
+ "DATE_TIME": 16,
76
+ "DEVICE_IDENTIFIER": 17,
77
+ "EMAIL": 18,
78
+ "EMPLOYEE_ID": 19,
79
+ "FIRST_NAME": 20,
80
+ "HEALTH_PLAN_BENEFICIARY_NUMBER": 21,
81
+ "IPV4": 22,
82
+ "IPV6": 23,
83
+ "LAST_NAME": 24,
84
+ "LICENSE_PLATE": 25,
85
+ "MEDICAL_RECORD_NUMBER": 26,
86
+ "NAME": 27,
87
+ "NATIONAL_ID": 28,
88
+ "O": 0,
89
+ "PASSWORD": 29,
90
+ "PHONE_NUMBER": 30,
91
+ "PIN": 31,
92
+ "POSTCODE": 32,
93
+ "SSN": 33,
94
+ "STATE": 34,
95
+ "STREET_ADDRESS": 35,
96
+ "SWIFT_BIC": 36,
97
+ "TAX_ID": 37,
98
+ "TIME": 38,
99
+ "UNIQUE_IDENTIFIER": 39,
100
+ "URL": 40,
101
+ "USER_NAME": 41,
102
+ "VEHICLE_IDENTIFIER": 42
103
+ },
104
+ "layer_norm_eps": 1e-07,
105
+ "legacy": true,
106
+ "max_position_embeddings": 512,
107
+ "max_relative_positions": -1,
108
+ "model_type": "deberta-v2",
109
+ "norm_rel_ebd": "layer_norm",
110
+ "num_attention_heads": 12,
111
+ "num_hidden_layers": 12,
112
+ "pad_token_id": 0,
113
+ "pooler_dropout": 0,
114
+ "pooler_hidden_act": "gelu",
115
+ "pooler_hidden_size": 768,
116
+ "pos_att_type": [
117
+ "p2c",
118
+ "c2p"
119
+ ],
120
+ "position_biased_input": false,
121
+ "position_buckets": 256,
122
+ "relative_attention": true,
123
+ "share_att_key": true,
124
+ "tie_word_embeddings": true,
125
+ "transformers_version": "5.3.0",
126
+ "type_vocab_size": 0,
127
+ "use_cache": false,
128
+ "vocab_size": 128100
129
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9606bfb37c6925a8caaabbc77b9066bb37688a0d1f8e6ba868f1d3b37a89f99c
3
+ size 741461252
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038e93ef1aa9f8a2866a0283c0be275ce9c1c5c5c6057604ec309b3ad0da33bb
3
+ size 1483046155
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74892cb27af197f490ca09a20bb2ddf7298f99ac792bc6870b3a8a79d9f46054
3
+ size 14645
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13b6e80bb16702d3286273b917e5ed5ee9b60b5a0162456ced03516efe584365
3
+ size 1465
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.8,
6
+ "eval_steps": 500,
7
+ "global_step": 2500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.16,
14
+ "grad_norm": 0.011844736523926258,
15
+ "learning_rate": 1.84032e-05,
16
+ "loss": 0.008585992813110352,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.32,
21
+ "grad_norm": 0.013846625573933125,
22
+ "learning_rate": 1.6803200000000002e-05,
23
+ "loss": 0.002212381362915039,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.48,
28
+ "grad_norm": 0.011384272016584873,
29
+ "learning_rate": 1.52032e-05,
30
+ "loss": 0.0010507731437683105,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.64,
35
+ "grad_norm": 0.007055494002997875,
36
+ "learning_rate": 1.36032e-05,
37
+ "loss": 0.0007087976336479187,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.8,
42
+ "grad_norm": 0.008433870039880276,
43
+ "learning_rate": 1.2003200000000002e-05,
44
+ "loss": 0.0005563670992851257,
45
+ "step": 2500
46
+ }
47
+ ],
48
+ "logging_steps": 500,
49
+ "max_steps": 6250,
50
+ "num_input_tokens_seen": 0,
51
+ "num_train_epochs": 2,
52
+ "save_steps": 500,
53
+ "stateful_callbacks": {
54
+ "TrainerControl": {
55
+ "args": {
56
+ "should_epoch_stop": false,
57
+ "should_evaluate": false,
58
+ "should_log": false,
59
+ "should_save": true,
60
+ "should_training_stop": false
61
+ },
62
+ "attributes": {}
63
+ }
64
+ },
65
+ "total_flos": 3271660330719936.0,
66
+ "train_batch_size": 16,
67
+ "trial_name": null,
68
+ "trial_params": null
69
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ca52d6960d0f230eb8c8694adae8d1e9f927f7543570b5d172d34722cb7745
3
+ size 5201
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaAdvancedSpanClassifier"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "ACCOUNT_NUMBER",
15
+ "2": "ADDRESS",
16
+ "3": "API_KEY",
17
+ "4": "BANK_ROUTING_NUMBER",
18
+ "5": "BIOMETRIC_IDENTIFIER",
19
+ "6": "CERTIFICATE_LICENSE_NUMBER",
20
+ "7": "CITY",
21
+ "8": "COMPANY_NAME",
22
+ "9": "COORDINATE",
23
+ "10": "COUNTRY",
24
+ "11": "CREDIT_CARD_NUMBER",
25
+ "12": "CUSTOMER_ID",
26
+ "13": "CVV",
27
+ "14": "DATE",
28
+ "15": "DATE_OF_BIRTH",
29
+ "16": "DATE_TIME",
30
+ "17": "DEVICE_IDENTIFIER",
31
+ "18": "EMAIL",
32
+ "19": "EMPLOYEE_ID",
33
+ "20": "FIRST_NAME",
34
+ "21": "HEALTH_PLAN_BENEFICIARY_NUMBER",
35
+ "22": "IPV4",
36
+ "23": "IPV6",
37
+ "24": "LAST_NAME",
38
+ "25": "LICENSE_PLATE",
39
+ "26": "MEDICAL_RECORD_NUMBER",
40
+ "27": "NAME",
41
+ "28": "NATIONAL_ID",
42
+ "29": "PASSWORD",
43
+ "30": "PHONE_NUMBER",
44
+ "31": "PIN",
45
+ "32": "POSTCODE",
46
+ "33": "SSN",
47
+ "34": "STATE",
48
+ "35": "STREET_ADDRESS",
49
+ "36": "SWIFT_BIC",
50
+ "37": "TAX_ID",
51
+ "38": "TIME",
52
+ "39": "UNIQUE_IDENTIFIER",
53
+ "40": "URL",
54
+ "41": "USER_NAME",
55
+ "42": "VEHICLE_IDENTIFIER"
56
+ },
57
+ "initializer_range": 0.02,
58
+ "intermediate_size": 3072,
59
+ "label2id": {
60
+ "ACCOUNT_NUMBER": 1,
61
+ "ADDRESS": 2,
62
+ "API_KEY": 3,
63
+ "BANK_ROUTING_NUMBER": 4,
64
+ "BIOMETRIC_IDENTIFIER": 5,
65
+ "CERTIFICATE_LICENSE_NUMBER": 6,
66
+ "CITY": 7,
67
+ "COMPANY_NAME": 8,
68
+ "COORDINATE": 9,
69
+ "COUNTRY": 10,
70
+ "CREDIT_CARD_NUMBER": 11,
71
+ "CUSTOMER_ID": 12,
72
+ "CVV": 13,
73
+ "DATE": 14,
74
+ "DATE_OF_BIRTH": 15,
75
+ "DATE_TIME": 16,
76
+ "DEVICE_IDENTIFIER": 17,
77
+ "EMAIL": 18,
78
+ "EMPLOYEE_ID": 19,
79
+ "FIRST_NAME": 20,
80
+ "HEALTH_PLAN_BENEFICIARY_NUMBER": 21,
81
+ "IPV4": 22,
82
+ "IPV6": 23,
83
+ "LAST_NAME": 24,
84
+ "LICENSE_PLATE": 25,
85
+ "MEDICAL_RECORD_NUMBER": 26,
86
+ "NAME": 27,
87
+ "NATIONAL_ID": 28,
88
+ "O": 0,
89
+ "PASSWORD": 29,
90
+ "PHONE_NUMBER": 30,
91
+ "PIN": 31,
92
+ "POSTCODE": 32,
93
+ "SSN": 33,
94
+ "STATE": 34,
95
+ "STREET_ADDRESS": 35,
96
+ "SWIFT_BIC": 36,
97
+ "TAX_ID": 37,
98
+ "TIME": 38,
99
+ "UNIQUE_IDENTIFIER": 39,
100
+ "URL": 40,
101
+ "USER_NAME": 41,
102
+ "VEHICLE_IDENTIFIER": 42
103
+ },
104
+ "layer_norm_eps": 1e-07,
105
+ "legacy": true,
106
+ "max_position_embeddings": 512,
107
+ "max_relative_positions": -1,
108
+ "model_type": "deberta-v2",
109
+ "norm_rel_ebd": "layer_norm",
110
+ "num_attention_heads": 12,
111
+ "num_hidden_layers": 12,
112
+ "pad_token_id": 0,
113
+ "pooler_dropout": 0,
114
+ "pooler_hidden_act": "gelu",
115
+ "pooler_hidden_size": 768,
116
+ "pos_att_type": [
117
+ "p2c",
118
+ "c2p"
119
+ ],
120
+ "position_biased_input": false,
121
+ "position_buckets": 256,
122
+ "relative_attention": true,
123
+ "share_att_key": true,
124
+ "tie_word_embeddings": true,
125
+ "transformers_version": "5.3.0",
126
+ "type_vocab_size": 0,
127
+ "use_cache": false,
128
+ "vocab_size": 128100
129
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e4fe55a8acfc191f81b4ce3380bcd71eb6fd8c9bae9cd432dbf8ff1cfbc4c64
3
+ size 741461252
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc0ee9d2d33e5ca4dfa468a97292ba35f058950c7dae9f3e75478320abd36fdc
3
+ size 1483046155
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad6c302abd5dd9bfbb411f772e51fadc95963573d569538dcfe173f7f16ea5a5
3
+ size 14645
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beb586565bc26fd4c3eb3ccda4841b5b0064e1d1a1fad7f54867d0b8ea11a633
3
+ size 1465
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.96,
6
+ "eval_steps": 500,
7
+ "global_step": 3000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.16,
14
+ "grad_norm": 0.011844736523926258,
15
+ "learning_rate": 1.84032e-05,
16
+ "loss": 0.008585992813110352,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.32,
21
+ "grad_norm": 0.013846625573933125,
22
+ "learning_rate": 1.6803200000000002e-05,
23
+ "loss": 0.002212381362915039,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.48,
28
+ "grad_norm": 0.011384272016584873,
29
+ "learning_rate": 1.52032e-05,
30
+ "loss": 0.0010507731437683105,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.64,
35
+ "grad_norm": 0.007055494002997875,
36
+ "learning_rate": 1.36032e-05,
37
+ "loss": 0.0007087976336479187,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.8,
42
+ "grad_norm": 0.008433870039880276,
43
+ "learning_rate": 1.2003200000000002e-05,
44
+ "loss": 0.0005563670992851257,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.96,
49
+ "grad_norm": 0.008034170605242252,
50
+ "learning_rate": 1.04032e-05,
51
+ "loss": 0.0004688462913036346,
52
+ "step": 3000
53
+ }
54
+ ],
55
+ "logging_steps": 500,
56
+ "max_steps": 6250,
57
+ "num_input_tokens_seen": 0,
58
+ "num_train_epochs": 2,
59
+ "save_steps": 500,
60
+ "stateful_callbacks": {
61
+ "TrainerControl": {
62
+ "args": {
63
+ "should_epoch_stop": false,
64
+ "should_evaluate": false,
65
+ "should_log": false,
66
+ "should_save": true,
67
+ "should_training_stop": false
68
+ },
69
+ "attributes": {}
70
+ }
71
+ },
72
+ "total_flos": 3940334363301024.0,
73
+ "train_batch_size": 16,
74
+ "trial_name": null,
75
+ "trial_params": null
76
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ca52d6960d0f230eb8c8694adae8d1e9f927f7543570b5d172d34722cb7745
3
+ size 5201
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaAdvancedSpanClassifier"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "ACCOUNT_NUMBER",
15
+ "2": "ADDRESS",
16
+ "3": "API_KEY",
17
+ "4": "BANK_ROUTING_NUMBER",
18
+ "5": "BIOMETRIC_IDENTIFIER",
19
+ "6": "CERTIFICATE_LICENSE_NUMBER",
20
+ "7": "CITY",
21
+ "8": "COMPANY_NAME",
22
+ "9": "COORDINATE",
23
+ "10": "COUNTRY",
24
+ "11": "CREDIT_CARD_NUMBER",
25
+ "12": "CUSTOMER_ID",
26
+ "13": "CVV",
27
+ "14": "DATE",
28
+ "15": "DATE_OF_BIRTH",
29
+ "16": "DATE_TIME",
30
+ "17": "DEVICE_IDENTIFIER",
31
+ "18": "EMAIL",
32
+ "19": "EMPLOYEE_ID",
33
+ "20": "FIRST_NAME",
34
+ "21": "HEALTH_PLAN_BENEFICIARY_NUMBER",
35
+ "22": "IPV4",
36
+ "23": "IPV6",
37
+ "24": "LAST_NAME",
38
+ "25": "LICENSE_PLATE",
39
+ "26": "MEDICAL_RECORD_NUMBER",
40
+ "27": "NAME",
41
+ "28": "NATIONAL_ID",
42
+ "29": "PASSWORD",
43
+ "30": "PHONE_NUMBER",
44
+ "31": "PIN",
45
+ "32": "POSTCODE",
46
+ "33": "SSN",
47
+ "34": "STATE",
48
+ "35": "STREET_ADDRESS",
49
+ "36": "SWIFT_BIC",
50
+ "37": "TAX_ID",
51
+ "38": "TIME",
52
+ "39": "UNIQUE_IDENTIFIER",
53
+ "40": "URL",
54
+ "41": "USER_NAME",
55
+ "42": "VEHICLE_IDENTIFIER"
56
+ },
57
+ "initializer_range": 0.02,
58
+ "intermediate_size": 3072,
59
+ "label2id": {
60
+ "ACCOUNT_NUMBER": 1,
61
+ "ADDRESS": 2,
62
+ "API_KEY": 3,
63
+ "BANK_ROUTING_NUMBER": 4,
64
+ "BIOMETRIC_IDENTIFIER": 5,
65
+ "CERTIFICATE_LICENSE_NUMBER": 6,
66
+ "CITY": 7,
67
+ "COMPANY_NAME": 8,
68
+ "COORDINATE": 9,
69
+ "COUNTRY": 10,
70
+ "CREDIT_CARD_NUMBER": 11,
71
+ "CUSTOMER_ID": 12,
72
+ "CVV": 13,
73
+ "DATE": 14,
74
+ "DATE_OF_BIRTH": 15,
75
+ "DATE_TIME": 16,
76
+ "DEVICE_IDENTIFIER": 17,
77
+ "EMAIL": 18,
78
+ "EMPLOYEE_ID": 19,
79
+ "FIRST_NAME": 20,
80
+ "HEALTH_PLAN_BENEFICIARY_NUMBER": 21,
81
+ "IPV4": 22,
82
+ "IPV6": 23,
83
+ "LAST_NAME": 24,
84
+ "LICENSE_PLATE": 25,
85
+ "MEDICAL_RECORD_NUMBER": 26,
86
+ "NAME": 27,
87
+ "NATIONAL_ID": 28,
88
+ "O": 0,
89
+ "PASSWORD": 29,
90
+ "PHONE_NUMBER": 30,
91
+ "PIN": 31,
92
+ "POSTCODE": 32,
93
+ "SSN": 33,
94
+ "STATE": 34,
95
+ "STREET_ADDRESS": 35,
96
+ "SWIFT_BIC": 36,
97
+ "TAX_ID": 37,
98
+ "TIME": 38,
99
+ "UNIQUE_IDENTIFIER": 39,
100
+ "URL": 40,
101
+ "USER_NAME": 41,
102
+ "VEHICLE_IDENTIFIER": 42
103
+ },
104
+ "layer_norm_eps": 1e-07,
105
+ "legacy": true,
106
+ "max_position_embeddings": 512,
107
+ "max_relative_positions": -1,
108
+ "model_type": "deberta-v2",
109
+ "norm_rel_ebd": "layer_norm",
110
+ "num_attention_heads": 12,
111
+ "num_hidden_layers": 12,
112
+ "pad_token_id": 0,
113
+ "pooler_dropout": 0,
114
+ "pooler_hidden_act": "gelu",
115
+ "pooler_hidden_size": 768,
116
+ "pos_att_type": [
117
+ "p2c",
118
+ "c2p"
119
+ ],
120
+ "position_biased_input": false,
121
+ "position_buckets": 256,
122
+ "relative_attention": true,
123
+ "share_att_key": true,
124
+ "tie_word_embeddings": true,
125
+ "transformers_version": "5.3.0",
126
+ "type_vocab_size": 0,
127
+ "use_cache": false,
128
+ "vocab_size": 128100
129
+ }
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc4e59b6afa4797a97876c0c8f3bf86edb6a6ee1a29cf134d4fccb8f08c7ce8
3
+ size 741461252
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dce4cb16b39e3578b47cd73f216ae5dd173b8f5632afc545a3e6017e23f389d
3
+ size 1483046155
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28bf2fc1defefab1a58e90eeb5f0d97c2a1fc3b92c5ad6a3283081feeedb57f0
3
+ size 14645
gretel-pii-ready/SpanBased-CustomDeBERTa/checkpoint-3500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd2e8f148245e3729b1fb45295e0a70d35ecb27a3e6d29c1c1d24b6e733abd0
3
+ size 1465