CaffeineThief commited on
Commit
6d40aff
·
verified ·
1 Parent(s): 9c39ab2

Upload saved model files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-1048/config.json +130 -0
  2. checkpoint-1048/model.safetensors +3 -0
  3. checkpoint-1048/optimizer.pt +3 -0
  4. checkpoint-1048/rng_state.pth +3 -0
  5. checkpoint-1048/scaler.pt +3 -0
  6. checkpoint-1048/scheduler.pt +3 -0
  7. checkpoint-1048/special_tokens_map.json +37 -0
  8. checkpoint-1048/tokenizer.json +0 -0
  9. checkpoint-1048/tokenizer_config.json +56 -0
  10. checkpoint-1048/trainer_state.json +279 -0
  11. checkpoint-1048/training_args.bin +3 -0
  12. checkpoint-1048/vocab.txt +0 -0
  13. checkpoint-1179/config.json +130 -0
  14. checkpoint-1179/model.safetensors +3 -0
  15. checkpoint-1179/optimizer.pt +3 -0
  16. checkpoint-1179/rng_state.pth +3 -0
  17. checkpoint-1179/scaler.pt +3 -0
  18. checkpoint-1179/scheduler.pt +3 -0
  19. checkpoint-1179/special_tokens_map.json +37 -0
  20. checkpoint-1179/tokenizer.json +0 -0
  21. checkpoint-1179/tokenizer_config.json +56 -0
  22. checkpoint-1179/trainer_state.json +312 -0
  23. checkpoint-1179/training_args.bin +3 -0
  24. checkpoint-1179/vocab.txt +0 -0
  25. checkpoint-131/config.json +130 -0
  26. checkpoint-131/model.safetensors +3 -0
  27. checkpoint-131/optimizer.pt +3 -0
  28. checkpoint-131/rng_state.pth +3 -0
  29. checkpoint-131/scaler.pt +3 -0
  30. checkpoint-131/scheduler.pt +3 -0
  31. checkpoint-131/special_tokens_map.json +37 -0
  32. checkpoint-131/tokenizer.json +0 -0
  33. checkpoint-131/tokenizer_config.json +56 -0
  34. checkpoint-131/trainer_state.json +69 -0
  35. checkpoint-131/training_args.bin +3 -0
  36. checkpoint-131/vocab.txt +0 -0
  37. checkpoint-1310/config.json +130 -0
  38. checkpoint-1310/model.safetensors +3 -0
  39. checkpoint-1310/optimizer.pt +3 -0
  40. checkpoint-1310/rng_state.pth +3 -0
  41. checkpoint-1310/scaler.pt +3 -0
  42. checkpoint-1310/scheduler.pt +3 -0
  43. checkpoint-1310/special_tokens_map.json +37 -0
  44. checkpoint-1310/tokenizer.json +0 -0
  45. checkpoint-1310/tokenizer_config.json +56 -0
  46. checkpoint-1310/trainer_state.json +345 -0
  47. checkpoint-1310/training_args.bin +3 -0
  48. checkpoint-1310/vocab.txt +0 -0
  49. checkpoint-262/config.json +130 -0
  50. checkpoint-262/model.safetensors +3 -0
checkpoint-1048/config.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1003.001",
13
+ "1": "T1005",
14
+ "2": "T1012",
15
+ "3": "T1016",
16
+ "4": "T1021.001",
17
+ "5": "T1027",
18
+ "6": "T1033",
19
+ "7": "T1036.005",
20
+ "8": "T1041",
21
+ "9": "T1047",
22
+ "10": "T1053.005",
23
+ "11": "T1055",
24
+ "12": "T1056.001",
25
+ "13": "T1057",
26
+ "14": "T1059.003",
27
+ "15": "T1068",
28
+ "16": "T1070.004",
29
+ "17": "T1071.001",
30
+ "18": "T1072",
31
+ "19": "T1074.001",
32
+ "20": "T1078",
33
+ "21": "T1082",
34
+ "22": "T1083",
35
+ "23": "T1090",
36
+ "24": "T1095",
37
+ "25": "T1105",
38
+ "26": "T1106",
39
+ "27": "T1110",
40
+ "28": "T1112",
41
+ "29": "T1113",
42
+ "30": "T1140",
43
+ "31": "T1190",
44
+ "32": "T1204.002",
45
+ "33": "T1210",
46
+ "34": "T1218.011",
47
+ "35": "T1219",
48
+ "36": "T1484.001",
49
+ "37": "T1518.001",
50
+ "38": "T1543.003",
51
+ "39": "T1547.001",
52
+ "40": "T1548.002",
53
+ "41": "T1552.001",
54
+ "42": "T1557.001",
55
+ "43": "T1562.001",
56
+ "44": "T1564.001",
57
+ "45": "T1566.001",
58
+ "46": "T1569.002",
59
+ "47": "T1570",
60
+ "48": "T1573.001",
61
+ "49": "T1574.002"
62
+ },
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 3072,
65
+ "label2id": {
66
+ "T1003.001": 0,
67
+ "T1005": 1,
68
+ "T1012": 2,
69
+ "T1016": 3,
70
+ "T1021.001": 4,
71
+ "T1027": 5,
72
+ "T1033": 6,
73
+ "T1036.005": 7,
74
+ "T1041": 8,
75
+ "T1047": 9,
76
+ "T1053.005": 10,
77
+ "T1055": 11,
78
+ "T1056.001": 12,
79
+ "T1057": 13,
80
+ "T1059.003": 14,
81
+ "T1068": 15,
82
+ "T1070.004": 16,
83
+ "T1071.001": 17,
84
+ "T1072": 18,
85
+ "T1074.001": 19,
86
+ "T1078": 20,
87
+ "T1082": 21,
88
+ "T1083": 22,
89
+ "T1090": 23,
90
+ "T1095": 24,
91
+ "T1105": 25,
92
+ "T1106": 26,
93
+ "T1110": 27,
94
+ "T1112": 28,
95
+ "T1113": 29,
96
+ "T1140": 30,
97
+ "T1190": 31,
98
+ "T1204.002": 32,
99
+ "T1210": 33,
100
+ "T1218.011": 34,
101
+ "T1219": 35,
102
+ "T1484.001": 36,
103
+ "T1518.001": 37,
104
+ "T1543.003": 38,
105
+ "T1547.001": 39,
106
+ "T1548.002": 40,
107
+ "T1552.001": 41,
108
+ "T1557.001": 42,
109
+ "T1562.001": 43,
110
+ "T1564.001": 44,
111
+ "T1566.001": 45,
112
+ "T1569.002": 46,
113
+ "T1570": 47,
114
+ "T1573.001": 48,
115
+ "T1574.002": 49
116
+ },
117
+ "layer_norm_eps": 1e-12,
118
+ "max_position_embeddings": 512,
119
+ "model_type": "bert",
120
+ "num_attention_heads": 12,
121
+ "num_hidden_layers": 12,
122
+ "pad_token_id": 0,
123
+ "position_embedding_type": "absolute",
124
+ "problem_type": "multi_label_classification",
125
+ "torch_dtype": "float32",
126
+ "transformers_version": "4.55.2",
127
+ "type_vocab_size": 2,
128
+ "use_cache": true,
129
+ "vocab_size": 30522
130
+ }
checkpoint-1048/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cfcdc795c449fc1a254b91276ece8de434985c4a3768c3ffcb178872849d18b
3
+ size 438106296
checkpoint-1048/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a85a5d906639547e756dfe39e4010d891e8cf80b5eb6fc66acfab1ad5e52fc
3
+ size 876333626
checkpoint-1048/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72cdc44e5b276de44fc6c8eb2fb7847347af06e107a341615d375b97cdca96e8
3
+ size 14244
checkpoint-1048/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
3
+ size 988
checkpoint-1048/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b38807a6792ac8ee07d51046be1a37c6cac67c83eb7c35b7404b57c7615dd81
3
+ size 1064
checkpoint-1048/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-1048/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1048/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1048/trainer_state.json ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 917,
3
+ "best_metric": 0.4342857142857143,
4
+ "best_model_checkpoint": "./cysecbert-ttp-tram2_base_data/checkpoint-917",
5
+ "epoch": 8.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1048,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.3816793893129771,
14
+ "grad_norm": 36039.78125,
15
+ "learning_rate": 2.45e-05,
16
+ "loss": 0.5416,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.7633587786259542,
21
+ "grad_norm": 9505.58984375,
22
+ "learning_rate": 4.9500000000000004e-05,
23
+ "loss": 0.2038,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_f1_macro": 0.0,
29
+ "eval_f1_micro": 0.0,
30
+ "eval_loss": 0.04570664092898369,
31
+ "eval_precision": 0.0,
32
+ "eval_recall": 0.0,
33
+ "eval_runtime": 10.7376,
34
+ "eval_samples_per_second": 355.759,
35
+ "eval_steps_per_second": 14.901,
36
+ "step": 131
37
+ },
38
+ {
39
+ "epoch": 1.1450381679389312,
40
+ "grad_norm": 9370.513671875,
41
+ "learning_rate": 4.981153846153847e-05,
42
+ "loss": 0.1094,
43
+ "step": 150
44
+ },
45
+ {
46
+ "epoch": 1.5267175572519083,
47
+ "grad_norm": 7157.39892578125,
48
+ "learning_rate": 4.961923076923077e-05,
49
+ "loss": 0.1075,
50
+ "step": 200
51
+ },
52
+ {
53
+ "epoch": 1.9083969465648853,
54
+ "grad_norm": 9901.6123046875,
55
+ "learning_rate": 4.942692307692308e-05,
56
+ "loss": 0.1026,
57
+ "step": 250
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_f1_macro": 0.0,
62
+ "eval_f1_micro": 0.0,
63
+ "eval_loss": 0.04716825485229492,
64
+ "eval_precision": 0.0,
65
+ "eval_recall": 0.0,
66
+ "eval_runtime": 10.5515,
67
+ "eval_samples_per_second": 362.033,
68
+ "eval_steps_per_second": 15.164,
69
+ "step": 262
70
+ },
71
+ {
72
+ "epoch": 2.2900763358778624,
73
+ "grad_norm": 7338.14794921875,
74
+ "learning_rate": 4.923461538461539e-05,
75
+ "loss": 0.1003,
76
+ "step": 300
77
+ },
78
+ {
79
+ "epoch": 2.67175572519084,
80
+ "grad_norm": 10370.3720703125,
81
+ "learning_rate": 4.904230769230769e-05,
82
+ "loss": 0.0907,
83
+ "step": 350
84
+ },
85
+ {
86
+ "epoch": 3.0,
87
+ "eval_f1_macro": 0.04150333229523513,
88
+ "eval_f1_micro": 0.26657552973342447,
89
+ "eval_loss": 0.04108869284391403,
90
+ "eval_precision": 0.4431818181818182,
91
+ "eval_recall": 0.1906158357771261,
92
+ "eval_runtime": 10.6444,
93
+ "eval_samples_per_second": 358.876,
94
+ "eval_steps_per_second": 15.031,
95
+ "step": 393
96
+ },
97
+ {
98
+ "epoch": 3.053435114503817,
99
+ "grad_norm": 8239.685546875,
100
+ "learning_rate": 4.885e-05,
101
+ "loss": 0.0848,
102
+ "step": 400
103
+ },
104
+ {
105
+ "epoch": 3.435114503816794,
106
+ "grad_norm": 7363.1640625,
107
+ "learning_rate": 4.865769230769231e-05,
108
+ "loss": 0.074,
109
+ "step": 450
110
+ },
111
+ {
112
+ "epoch": 3.816793893129771,
113
+ "grad_norm": 10253.478515625,
114
+ "learning_rate": 4.8465384615384616e-05,
115
+ "loss": 0.0708,
116
+ "step": 500
117
+ },
118
+ {
119
+ "epoch": 4.0,
120
+ "eval_f1_macro": 0.0865137040218318,
121
+ "eval_f1_micro": 0.31753283837806967,
122
+ "eval_loss": 0.03653639927506447,
123
+ "eval_precision": 0.38186813186813184,
124
+ "eval_recall": 0.27174975562072334,
125
+ "eval_runtime": 10.3181,
126
+ "eval_samples_per_second": 370.223,
127
+ "eval_steps_per_second": 15.507,
128
+ "step": 524
129
+ },
130
+ {
131
+ "epoch": 4.198473282442748,
132
+ "grad_norm": 7632.10791015625,
133
+ "learning_rate": 4.827307692307693e-05,
134
+ "loss": 0.0632,
135
+ "step": 550
136
+ },
137
+ {
138
+ "epoch": 4.580152671755725,
139
+ "grad_norm": 9150.0556640625,
140
+ "learning_rate": 4.808076923076924e-05,
141
+ "loss": 0.0587,
142
+ "step": 600
143
+ },
144
+ {
145
+ "epoch": 4.961832061068702,
146
+ "grad_norm": 13003.1611328125,
147
+ "learning_rate": 4.788846153846154e-05,
148
+ "loss": 0.0536,
149
+ "step": 650
150
+ },
151
+ {
152
+ "epoch": 5.0,
153
+ "eval_f1_macro": 0.17183822511375405,
154
+ "eval_f1_micro": 0.33932302549101545,
155
+ "eval_loss": 0.03477426990866661,
156
+ "eval_precision": 0.29635036496350364,
157
+ "eval_recall": 0.396871945259042,
158
+ "eval_runtime": 10.2992,
159
+ "eval_samples_per_second": 370.901,
160
+ "eval_steps_per_second": 15.535,
161
+ "step": 655
162
+ },
163
+ {
164
+ "epoch": 5.34351145038168,
165
+ "grad_norm": 9582.8896484375,
166
+ "learning_rate": 4.7696153846153846e-05,
167
+ "loss": 0.0466,
168
+ "step": 700
169
+ },
170
+ {
171
+ "epoch": 5.7251908396946565,
172
+ "grad_norm": 10410.0546875,
173
+ "learning_rate": 4.750384615384616e-05,
174
+ "loss": 0.0427,
175
+ "step": 750
176
+ },
177
+ {
178
+ "epoch": 6.0,
179
+ "eval_f1_macro": 0.2823004051844873,
180
+ "eval_f1_micro": 0.4227574750830565,
181
+ "eval_loss": 0.03131383657455444,
182
+ "eval_precision": 0.3675090252707581,
183
+ "eval_recall": 0.4975562072336266,
184
+ "eval_runtime": 10.3222,
185
+ "eval_samples_per_second": 370.075,
186
+ "eval_steps_per_second": 15.501,
187
+ "step": 786
188
+ },
189
+ {
190
+ "epoch": 6.106870229007634,
191
+ "grad_norm": 8987.109375,
192
+ "learning_rate": 4.731153846153846e-05,
193
+ "loss": 0.0402,
194
+ "step": 800
195
+ },
196
+ {
197
+ "epoch": 6.488549618320611,
198
+ "grad_norm": 4473.130859375,
199
+ "learning_rate": 4.711923076923077e-05,
200
+ "loss": 0.0351,
201
+ "step": 850
202
+ },
203
+ {
204
+ "epoch": 6.870229007633588,
205
+ "grad_norm": 10426.6162109375,
206
+ "learning_rate": 4.692692307692308e-05,
207
+ "loss": 0.0337,
208
+ "step": 900
209
+ },
210
+ {
211
+ "epoch": 7.0,
212
+ "eval_f1_macro": 0.3363462665390104,
213
+ "eval_f1_micro": 0.4342857142857143,
214
+ "eval_loss": 0.03023417480289936,
215
+ "eval_precision": 0.35580524344569286,
216
+ "eval_recall": 0.5571847507331378,
217
+ "eval_runtime": 10.2654,
218
+ "eval_samples_per_second": 372.125,
219
+ "eval_steps_per_second": 15.586,
220
+ "step": 917
221
+ },
222
+ {
223
+ "epoch": 7.251908396946565,
224
+ "grad_norm": 8006.98779296875,
225
+ "learning_rate": 4.673461538461539e-05,
226
+ "loss": 0.0303,
227
+ "step": 950
228
+ },
229
+ {
230
+ "epoch": 7.633587786259542,
231
+ "grad_norm": 8583.732421875,
232
+ "learning_rate": 4.65423076923077e-05,
233
+ "loss": 0.0282,
234
+ "step": 1000
235
+ },
236
+ {
237
+ "epoch": 8.0,
238
+ "eval_f1_macro": 0.3390787375804454,
239
+ "eval_f1_micro": 0.41675248884311705,
240
+ "eval_loss": 0.031076261773705482,
241
+ "eval_precision": 0.32116402116402115,
242
+ "eval_recall": 0.5933528836754643,
243
+ "eval_runtime": 10.2543,
244
+ "eval_samples_per_second": 372.526,
245
+ "eval_steps_per_second": 15.603,
246
+ "step": 1048
247
+ }
248
+ ],
249
+ "logging_steps": 50,
250
+ "max_steps": 13100,
251
+ "num_input_tokens_seen": 0,
252
+ "num_train_epochs": 100,
253
+ "save_steps": 500,
254
+ "stateful_callbacks": {
255
+ "EarlyStoppingCallback": {
256
+ "args": {
257
+ "early_stopping_patience": 3,
258
+ "early_stopping_threshold": 0.0
259
+ },
260
+ "attributes": {
261
+ "early_stopping_patience_counter": 1
262
+ }
263
+ },
264
+ "TrainerControl": {
265
+ "args": {
266
+ "should_epoch_stop": false,
267
+ "should_evaluate": false,
268
+ "should_log": false,
269
+ "should_save": true,
270
+ "should_training_stop": false
271
+ },
272
+ "attributes": {}
273
+ }
274
+ },
275
+ "total_flos": 6589034407968768.0,
276
+ "train_batch_size": 24,
277
+ "trial_name": null,
278
+ "trial_params": null
279
+ }
checkpoint-1048/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aee2fca7ff2c999f9ae67a083c9553706cd4b3e2f09a64c6ebac2a0bd6a82c0
3
+ size 5368
checkpoint-1048/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1179/config.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1003.001",
13
+ "1": "T1005",
14
+ "2": "T1012",
15
+ "3": "T1016",
16
+ "4": "T1021.001",
17
+ "5": "T1027",
18
+ "6": "T1033",
19
+ "7": "T1036.005",
20
+ "8": "T1041",
21
+ "9": "T1047",
22
+ "10": "T1053.005",
23
+ "11": "T1055",
24
+ "12": "T1056.001",
25
+ "13": "T1057",
26
+ "14": "T1059.003",
27
+ "15": "T1068",
28
+ "16": "T1070.004",
29
+ "17": "T1071.001",
30
+ "18": "T1072",
31
+ "19": "T1074.001",
32
+ "20": "T1078",
33
+ "21": "T1082",
34
+ "22": "T1083",
35
+ "23": "T1090",
36
+ "24": "T1095",
37
+ "25": "T1105",
38
+ "26": "T1106",
39
+ "27": "T1110",
40
+ "28": "T1112",
41
+ "29": "T1113",
42
+ "30": "T1140",
43
+ "31": "T1190",
44
+ "32": "T1204.002",
45
+ "33": "T1210",
46
+ "34": "T1218.011",
47
+ "35": "T1219",
48
+ "36": "T1484.001",
49
+ "37": "T1518.001",
50
+ "38": "T1543.003",
51
+ "39": "T1547.001",
52
+ "40": "T1548.002",
53
+ "41": "T1552.001",
54
+ "42": "T1557.001",
55
+ "43": "T1562.001",
56
+ "44": "T1564.001",
57
+ "45": "T1566.001",
58
+ "46": "T1569.002",
59
+ "47": "T1570",
60
+ "48": "T1573.001",
61
+ "49": "T1574.002"
62
+ },
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 3072,
65
+ "label2id": {
66
+ "T1003.001": 0,
67
+ "T1005": 1,
68
+ "T1012": 2,
69
+ "T1016": 3,
70
+ "T1021.001": 4,
71
+ "T1027": 5,
72
+ "T1033": 6,
73
+ "T1036.005": 7,
74
+ "T1041": 8,
75
+ "T1047": 9,
76
+ "T1053.005": 10,
77
+ "T1055": 11,
78
+ "T1056.001": 12,
79
+ "T1057": 13,
80
+ "T1059.003": 14,
81
+ "T1068": 15,
82
+ "T1070.004": 16,
83
+ "T1071.001": 17,
84
+ "T1072": 18,
85
+ "T1074.001": 19,
86
+ "T1078": 20,
87
+ "T1082": 21,
88
+ "T1083": 22,
89
+ "T1090": 23,
90
+ "T1095": 24,
91
+ "T1105": 25,
92
+ "T1106": 26,
93
+ "T1110": 27,
94
+ "T1112": 28,
95
+ "T1113": 29,
96
+ "T1140": 30,
97
+ "T1190": 31,
98
+ "T1204.002": 32,
99
+ "T1210": 33,
100
+ "T1218.011": 34,
101
+ "T1219": 35,
102
+ "T1484.001": 36,
103
+ "T1518.001": 37,
104
+ "T1543.003": 38,
105
+ "T1547.001": 39,
106
+ "T1548.002": 40,
107
+ "T1552.001": 41,
108
+ "T1557.001": 42,
109
+ "T1562.001": 43,
110
+ "T1564.001": 44,
111
+ "T1566.001": 45,
112
+ "T1569.002": 46,
113
+ "T1570": 47,
114
+ "T1573.001": 48,
115
+ "T1574.002": 49
116
+ },
117
+ "layer_norm_eps": 1e-12,
118
+ "max_position_embeddings": 512,
119
+ "model_type": "bert",
120
+ "num_attention_heads": 12,
121
+ "num_hidden_layers": 12,
122
+ "pad_token_id": 0,
123
+ "position_embedding_type": "absolute",
124
+ "problem_type": "multi_label_classification",
125
+ "torch_dtype": "float32",
126
+ "transformers_version": "4.55.2",
127
+ "type_vocab_size": 2,
128
+ "use_cache": true,
129
+ "vocab_size": 30522
130
+ }
checkpoint-1179/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5822958b26f02fa3028798bf655fdb17f1c0733f7b9d23a9f8a444a69b42a8a
3
+ size 438106296
checkpoint-1179/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef34d9bd48627f8b229cfe1fe0045bd65de0405bfa14b23bd72bcb44c7ba01d6
3
+ size 876333626
checkpoint-1179/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e9f091d267fe28797711d5a79086536ecdc91656f6d0da00338fefe355f1c9e
3
+ size 14244
checkpoint-1179/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
3
+ size 988
checkpoint-1179/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b16f7e1dabff2e10dc9bb8c4961288eb033720484f93d6ea2266d86beed88786
3
+ size 1064
checkpoint-1179/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-1179/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1179/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1179/trainer_state.json ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 917,
3
+ "best_metric": 0.4342857142857143,
4
+ "best_model_checkpoint": "./cysecbert-ttp-tram2_base_data/checkpoint-917",
5
+ "epoch": 9.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1179,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.3816793893129771,
14
+ "grad_norm": 36039.78125,
15
+ "learning_rate": 2.45e-05,
16
+ "loss": 0.5416,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.7633587786259542,
21
+ "grad_norm": 9505.58984375,
22
+ "learning_rate": 4.9500000000000004e-05,
23
+ "loss": 0.2038,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_f1_macro": 0.0,
29
+ "eval_f1_micro": 0.0,
30
+ "eval_loss": 0.04570664092898369,
31
+ "eval_precision": 0.0,
32
+ "eval_recall": 0.0,
33
+ "eval_runtime": 10.7376,
34
+ "eval_samples_per_second": 355.759,
35
+ "eval_steps_per_second": 14.901,
36
+ "step": 131
37
+ },
38
+ {
39
+ "epoch": 1.1450381679389312,
40
+ "grad_norm": 9370.513671875,
41
+ "learning_rate": 4.981153846153847e-05,
42
+ "loss": 0.1094,
43
+ "step": 150
44
+ },
45
+ {
46
+ "epoch": 1.5267175572519083,
47
+ "grad_norm": 7157.39892578125,
48
+ "learning_rate": 4.961923076923077e-05,
49
+ "loss": 0.1075,
50
+ "step": 200
51
+ },
52
+ {
53
+ "epoch": 1.9083969465648853,
54
+ "grad_norm": 9901.6123046875,
55
+ "learning_rate": 4.942692307692308e-05,
56
+ "loss": 0.1026,
57
+ "step": 250
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_f1_macro": 0.0,
62
+ "eval_f1_micro": 0.0,
63
+ "eval_loss": 0.04716825485229492,
64
+ "eval_precision": 0.0,
65
+ "eval_recall": 0.0,
66
+ "eval_runtime": 10.5515,
67
+ "eval_samples_per_second": 362.033,
68
+ "eval_steps_per_second": 15.164,
69
+ "step": 262
70
+ },
71
+ {
72
+ "epoch": 2.2900763358778624,
73
+ "grad_norm": 7338.14794921875,
74
+ "learning_rate": 4.923461538461539e-05,
75
+ "loss": 0.1003,
76
+ "step": 300
77
+ },
78
+ {
79
+ "epoch": 2.67175572519084,
80
+ "grad_norm": 10370.3720703125,
81
+ "learning_rate": 4.904230769230769e-05,
82
+ "loss": 0.0907,
83
+ "step": 350
84
+ },
85
+ {
86
+ "epoch": 3.0,
87
+ "eval_f1_macro": 0.04150333229523513,
88
+ "eval_f1_micro": 0.26657552973342447,
89
+ "eval_loss": 0.04108869284391403,
90
+ "eval_precision": 0.4431818181818182,
91
+ "eval_recall": 0.1906158357771261,
92
+ "eval_runtime": 10.6444,
93
+ "eval_samples_per_second": 358.876,
94
+ "eval_steps_per_second": 15.031,
95
+ "step": 393
96
+ },
97
+ {
98
+ "epoch": 3.053435114503817,
99
+ "grad_norm": 8239.685546875,
100
+ "learning_rate": 4.885e-05,
101
+ "loss": 0.0848,
102
+ "step": 400
103
+ },
104
+ {
105
+ "epoch": 3.435114503816794,
106
+ "grad_norm": 7363.1640625,
107
+ "learning_rate": 4.865769230769231e-05,
108
+ "loss": 0.074,
109
+ "step": 450
110
+ },
111
+ {
112
+ "epoch": 3.816793893129771,
113
+ "grad_norm": 10253.478515625,
114
+ "learning_rate": 4.8465384615384616e-05,
115
+ "loss": 0.0708,
116
+ "step": 500
117
+ },
118
+ {
119
+ "epoch": 4.0,
120
+ "eval_f1_macro": 0.0865137040218318,
121
+ "eval_f1_micro": 0.31753283837806967,
122
+ "eval_loss": 0.03653639927506447,
123
+ "eval_precision": 0.38186813186813184,
124
+ "eval_recall": 0.27174975562072334,
125
+ "eval_runtime": 10.3181,
126
+ "eval_samples_per_second": 370.223,
127
+ "eval_steps_per_second": 15.507,
128
+ "step": 524
129
+ },
130
+ {
131
+ "epoch": 4.198473282442748,
132
+ "grad_norm": 7632.10791015625,
133
+ "learning_rate": 4.827307692307693e-05,
134
+ "loss": 0.0632,
135
+ "step": 550
136
+ },
137
+ {
138
+ "epoch": 4.580152671755725,
139
+ "grad_norm": 9150.0556640625,
140
+ "learning_rate": 4.808076923076924e-05,
141
+ "loss": 0.0587,
142
+ "step": 600
143
+ },
144
+ {
145
+ "epoch": 4.961832061068702,
146
+ "grad_norm": 13003.1611328125,
147
+ "learning_rate": 4.788846153846154e-05,
148
+ "loss": 0.0536,
149
+ "step": 650
150
+ },
151
+ {
152
+ "epoch": 5.0,
153
+ "eval_f1_macro": 0.17183822511375405,
154
+ "eval_f1_micro": 0.33932302549101545,
155
+ "eval_loss": 0.03477426990866661,
156
+ "eval_precision": 0.29635036496350364,
157
+ "eval_recall": 0.396871945259042,
158
+ "eval_runtime": 10.2992,
159
+ "eval_samples_per_second": 370.901,
160
+ "eval_steps_per_second": 15.535,
161
+ "step": 655
162
+ },
163
+ {
164
+ "epoch": 5.34351145038168,
165
+ "grad_norm": 9582.8896484375,
166
+ "learning_rate": 4.7696153846153846e-05,
167
+ "loss": 0.0466,
168
+ "step": 700
169
+ },
170
+ {
171
+ "epoch": 5.7251908396946565,
172
+ "grad_norm": 10410.0546875,
173
+ "learning_rate": 4.750384615384616e-05,
174
+ "loss": 0.0427,
175
+ "step": 750
176
+ },
177
+ {
178
+ "epoch": 6.0,
179
+ "eval_f1_macro": 0.2823004051844873,
180
+ "eval_f1_micro": 0.4227574750830565,
181
+ "eval_loss": 0.03131383657455444,
182
+ "eval_precision": 0.3675090252707581,
183
+ "eval_recall": 0.4975562072336266,
184
+ "eval_runtime": 10.3222,
185
+ "eval_samples_per_second": 370.075,
186
+ "eval_steps_per_second": 15.501,
187
+ "step": 786
188
+ },
189
+ {
190
+ "epoch": 6.106870229007634,
191
+ "grad_norm": 8987.109375,
192
+ "learning_rate": 4.731153846153846e-05,
193
+ "loss": 0.0402,
194
+ "step": 800
195
+ },
196
+ {
197
+ "epoch": 6.488549618320611,
198
+ "grad_norm": 4473.130859375,
199
+ "learning_rate": 4.711923076923077e-05,
200
+ "loss": 0.0351,
201
+ "step": 850
202
+ },
203
+ {
204
+ "epoch": 6.870229007633588,
205
+ "grad_norm": 10426.6162109375,
206
+ "learning_rate": 4.692692307692308e-05,
207
+ "loss": 0.0337,
208
+ "step": 900
209
+ },
210
+ {
211
+ "epoch": 7.0,
212
+ "eval_f1_macro": 0.3363462665390104,
213
+ "eval_f1_micro": 0.4342857142857143,
214
+ "eval_loss": 0.03023417480289936,
215
+ "eval_precision": 0.35580524344569286,
216
+ "eval_recall": 0.5571847507331378,
217
+ "eval_runtime": 10.2654,
218
+ "eval_samples_per_second": 372.125,
219
+ "eval_steps_per_second": 15.586,
220
+ "step": 917
221
+ },
222
+ {
223
+ "epoch": 7.251908396946565,
224
+ "grad_norm": 8006.98779296875,
225
+ "learning_rate": 4.673461538461539e-05,
226
+ "loss": 0.0303,
227
+ "step": 950
228
+ },
229
+ {
230
+ "epoch": 7.633587786259542,
231
+ "grad_norm": 8583.732421875,
232
+ "learning_rate": 4.65423076923077e-05,
233
+ "loss": 0.0282,
234
+ "step": 1000
235
+ },
236
+ {
237
+ "epoch": 8.0,
238
+ "eval_f1_macro": 0.3390787375804454,
239
+ "eval_f1_micro": 0.41675248884311705,
240
+ "eval_loss": 0.031076261773705482,
241
+ "eval_precision": 0.32116402116402115,
242
+ "eval_recall": 0.5933528836754643,
243
+ "eval_runtime": 10.2543,
244
+ "eval_samples_per_second": 372.526,
245
+ "eval_steps_per_second": 15.603,
246
+ "step": 1048
247
+ },
248
+ {
249
+ "epoch": 8.01526717557252,
250
+ "grad_norm": 5588.6708984375,
251
+ "learning_rate": 4.635e-05,
252
+ "loss": 0.0281,
253
+ "step": 1050
254
+ },
255
+ {
256
+ "epoch": 8.396946564885496,
257
+ "grad_norm": 5759.66650390625,
258
+ "learning_rate": 4.6157692307692306e-05,
259
+ "loss": 0.0236,
260
+ "step": 1100
261
+ },
262
+ {
263
+ "epoch": 8.778625954198473,
264
+ "grad_norm": 7545.55810546875,
265
+ "learning_rate": 4.596538461538462e-05,
266
+ "loss": 0.0234,
267
+ "step": 1150
268
+ },
269
+ {
270
+ "epoch": 9.0,
271
+ "eval_f1_macro": 0.3290051589603852,
272
+ "eval_f1_micro": 0.42324983943481054,
273
+ "eval_loss": 0.03171388432383537,
274
+ "eval_precision": 0.3151602104256337,
275
+ "eval_recall": 0.6441837732160313,
276
+ "eval_runtime": 10.2276,
277
+ "eval_samples_per_second": 373.498,
278
+ "eval_steps_per_second": 15.644,
279
+ "step": 1179
280
+ }
281
+ ],
282
+ "logging_steps": 50,
283
+ "max_steps": 13100,
284
+ "num_input_tokens_seen": 0,
285
+ "num_train_epochs": 100,
286
+ "save_steps": 500,
287
+ "stateful_callbacks": {
288
+ "EarlyStoppingCallback": {
289
+ "args": {
290
+ "early_stopping_patience": 3,
291
+ "early_stopping_threshold": 0.0
292
+ },
293
+ "attributes": {
294
+ "early_stopping_patience_counter": 2
295
+ }
296
+ },
297
+ "TrainerControl": {
298
+ "args": {
299
+ "should_epoch_stop": false,
300
+ "should_evaluate": false,
301
+ "should_log": false,
302
+ "should_save": true,
303
+ "should_training_stop": false
304
+ },
305
+ "attributes": {}
306
+ }
307
+ },
308
+ "total_flos": 7412663708964864.0,
309
+ "train_batch_size": 24,
310
+ "trial_name": null,
311
+ "trial_params": null
312
+ }
checkpoint-1179/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aee2fca7ff2c999f9ae67a083c9553706cd4b3e2f09a64c6ebac2a0bd6a82c0
3
+ size 5368
checkpoint-1179/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-131/config.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1003.001",
13
+ "1": "T1005",
14
+ "2": "T1012",
15
+ "3": "T1016",
16
+ "4": "T1021.001",
17
+ "5": "T1027",
18
+ "6": "T1033",
19
+ "7": "T1036.005",
20
+ "8": "T1041",
21
+ "9": "T1047",
22
+ "10": "T1053.005",
23
+ "11": "T1055",
24
+ "12": "T1056.001",
25
+ "13": "T1057",
26
+ "14": "T1059.003",
27
+ "15": "T1068",
28
+ "16": "T1070.004",
29
+ "17": "T1071.001",
30
+ "18": "T1072",
31
+ "19": "T1074.001",
32
+ "20": "T1078",
33
+ "21": "T1082",
34
+ "22": "T1083",
35
+ "23": "T1090",
36
+ "24": "T1095",
37
+ "25": "T1105",
38
+ "26": "T1106",
39
+ "27": "T1110",
40
+ "28": "T1112",
41
+ "29": "T1113",
42
+ "30": "T1140",
43
+ "31": "T1190",
44
+ "32": "T1204.002",
45
+ "33": "T1210",
46
+ "34": "T1218.011",
47
+ "35": "T1219",
48
+ "36": "T1484.001",
49
+ "37": "T1518.001",
50
+ "38": "T1543.003",
51
+ "39": "T1547.001",
52
+ "40": "T1548.002",
53
+ "41": "T1552.001",
54
+ "42": "T1557.001",
55
+ "43": "T1562.001",
56
+ "44": "T1564.001",
57
+ "45": "T1566.001",
58
+ "46": "T1569.002",
59
+ "47": "T1570",
60
+ "48": "T1573.001",
61
+ "49": "T1574.002"
62
+ },
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 3072,
65
+ "label2id": {
66
+ "T1003.001": 0,
67
+ "T1005": 1,
68
+ "T1012": 2,
69
+ "T1016": 3,
70
+ "T1021.001": 4,
71
+ "T1027": 5,
72
+ "T1033": 6,
73
+ "T1036.005": 7,
74
+ "T1041": 8,
75
+ "T1047": 9,
76
+ "T1053.005": 10,
77
+ "T1055": 11,
78
+ "T1056.001": 12,
79
+ "T1057": 13,
80
+ "T1059.003": 14,
81
+ "T1068": 15,
82
+ "T1070.004": 16,
83
+ "T1071.001": 17,
84
+ "T1072": 18,
85
+ "T1074.001": 19,
86
+ "T1078": 20,
87
+ "T1082": 21,
88
+ "T1083": 22,
89
+ "T1090": 23,
90
+ "T1095": 24,
91
+ "T1105": 25,
92
+ "T1106": 26,
93
+ "T1110": 27,
94
+ "T1112": 28,
95
+ "T1113": 29,
96
+ "T1140": 30,
97
+ "T1190": 31,
98
+ "T1204.002": 32,
99
+ "T1210": 33,
100
+ "T1218.011": 34,
101
+ "T1219": 35,
102
+ "T1484.001": 36,
103
+ "T1518.001": 37,
104
+ "T1543.003": 38,
105
+ "T1547.001": 39,
106
+ "T1548.002": 40,
107
+ "T1552.001": 41,
108
+ "T1557.001": 42,
109
+ "T1562.001": 43,
110
+ "T1564.001": 44,
111
+ "T1566.001": 45,
112
+ "T1569.002": 46,
113
+ "T1570": 47,
114
+ "T1573.001": 48,
115
+ "T1574.002": 49
116
+ },
117
+ "layer_norm_eps": 1e-12,
118
+ "max_position_embeddings": 512,
119
+ "model_type": "bert",
120
+ "num_attention_heads": 12,
121
+ "num_hidden_layers": 12,
122
+ "pad_token_id": 0,
123
+ "position_embedding_type": "absolute",
124
+ "problem_type": "multi_label_classification",
125
+ "torch_dtype": "float32",
126
+ "transformers_version": "4.55.2",
127
+ "type_vocab_size": 2,
128
+ "use_cache": true,
129
+ "vocab_size": 30522
130
+ }
checkpoint-131/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:932d9c9d68222f910360bc25b11a32355f772528e78b42c8608a153514f22ca7
3
+ size 438106296
checkpoint-131/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934f79c25a2482f702912af7b204a859760484dfd623b22ad4aee588a9318cfc
3
+ size 876333626
checkpoint-131/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b892a0b2a0c1858081f0f8d98362d2bbb7b305a53aa9ef9213f44154195f395
3
+ size 14244
checkpoint-131/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
3
+ size 988
checkpoint-131/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1066e0f7a6b0ffccfd8a3169b32675cd48308393d4535ec5b4f45051c38a6f81
3
+ size 1064
checkpoint-131/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-131/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-131/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-131/trainer_state.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 131,
3
+ "best_metric": 0.0,
4
+ "best_model_checkpoint": "./cysecbert-ttp-tram2_base_data/checkpoint-131",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 131,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.3816793893129771,
14
+ "grad_norm": 36039.78125,
15
+ "learning_rate": 2.45e-05,
16
+ "loss": 0.5416,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.7633587786259542,
21
+ "grad_norm": 9505.58984375,
22
+ "learning_rate": 4.9500000000000004e-05,
23
+ "loss": 0.2038,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_f1_macro": 0.0,
29
+ "eval_f1_micro": 0.0,
30
+ "eval_loss": 0.04570664092898369,
31
+ "eval_precision": 0.0,
32
+ "eval_recall": 0.0,
33
+ "eval_runtime": 10.7376,
34
+ "eval_samples_per_second": 355.759,
35
+ "eval_steps_per_second": 14.901,
36
+ "step": 131
37
+ }
38
+ ],
39
+ "logging_steps": 50,
40
+ "max_steps": 13100,
41
+ "num_input_tokens_seen": 0,
42
+ "num_train_epochs": 100,
43
+ "save_steps": 500,
44
+ "stateful_callbacks": {
45
+ "EarlyStoppingCallback": {
46
+ "args": {
47
+ "early_stopping_patience": 3,
48
+ "early_stopping_threshold": 0.0
49
+ },
50
+ "attributes": {
51
+ "early_stopping_patience_counter": 0
52
+ }
53
+ },
54
+ "TrainerControl": {
55
+ "args": {
56
+ "should_epoch_stop": false,
57
+ "should_evaluate": false,
58
+ "should_log": false,
59
+ "should_save": true,
60
+ "should_training_stop": false
61
+ },
62
+ "attributes": {}
63
+ }
64
+ },
65
+ "total_flos": 823629300996096.0,
66
+ "train_batch_size": 24,
67
+ "trial_name": null,
68
+ "trial_params": null
69
+ }
checkpoint-131/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aee2fca7ff2c999f9ae67a083c9553706cd4b3e2f09a64c6ebac2a0bd6a82c0
3
+ size 5368
checkpoint-131/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1310/config.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1003.001",
13
+ "1": "T1005",
14
+ "2": "T1012",
15
+ "3": "T1016",
16
+ "4": "T1021.001",
17
+ "5": "T1027",
18
+ "6": "T1033",
19
+ "7": "T1036.005",
20
+ "8": "T1041",
21
+ "9": "T1047",
22
+ "10": "T1053.005",
23
+ "11": "T1055",
24
+ "12": "T1056.001",
25
+ "13": "T1057",
26
+ "14": "T1059.003",
27
+ "15": "T1068",
28
+ "16": "T1070.004",
29
+ "17": "T1071.001",
30
+ "18": "T1072",
31
+ "19": "T1074.001",
32
+ "20": "T1078",
33
+ "21": "T1082",
34
+ "22": "T1083",
35
+ "23": "T1090",
36
+ "24": "T1095",
37
+ "25": "T1105",
38
+ "26": "T1106",
39
+ "27": "T1110",
40
+ "28": "T1112",
41
+ "29": "T1113",
42
+ "30": "T1140",
43
+ "31": "T1190",
44
+ "32": "T1204.002",
45
+ "33": "T1210",
46
+ "34": "T1218.011",
47
+ "35": "T1219",
48
+ "36": "T1484.001",
49
+ "37": "T1518.001",
50
+ "38": "T1543.003",
51
+ "39": "T1547.001",
52
+ "40": "T1548.002",
53
+ "41": "T1552.001",
54
+ "42": "T1557.001",
55
+ "43": "T1562.001",
56
+ "44": "T1564.001",
57
+ "45": "T1566.001",
58
+ "46": "T1569.002",
59
+ "47": "T1570",
60
+ "48": "T1573.001",
61
+ "49": "T1574.002"
62
+ },
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 3072,
65
+ "label2id": {
66
+ "T1003.001": 0,
67
+ "T1005": 1,
68
+ "T1012": 2,
69
+ "T1016": 3,
70
+ "T1021.001": 4,
71
+ "T1027": 5,
72
+ "T1033": 6,
73
+ "T1036.005": 7,
74
+ "T1041": 8,
75
+ "T1047": 9,
76
+ "T1053.005": 10,
77
+ "T1055": 11,
78
+ "T1056.001": 12,
79
+ "T1057": 13,
80
+ "T1059.003": 14,
81
+ "T1068": 15,
82
+ "T1070.004": 16,
83
+ "T1071.001": 17,
84
+ "T1072": 18,
85
+ "T1074.001": 19,
86
+ "T1078": 20,
87
+ "T1082": 21,
88
+ "T1083": 22,
89
+ "T1090": 23,
90
+ "T1095": 24,
91
+ "T1105": 25,
92
+ "T1106": 26,
93
+ "T1110": 27,
94
+ "T1112": 28,
95
+ "T1113": 29,
96
+ "T1140": 30,
97
+ "T1190": 31,
98
+ "T1204.002": 32,
99
+ "T1210": 33,
100
+ "T1218.011": 34,
101
+ "T1219": 35,
102
+ "T1484.001": 36,
103
+ "T1518.001": 37,
104
+ "T1543.003": 38,
105
+ "T1547.001": 39,
106
+ "T1548.002": 40,
107
+ "T1552.001": 41,
108
+ "T1557.001": 42,
109
+ "T1562.001": 43,
110
+ "T1564.001": 44,
111
+ "T1566.001": 45,
112
+ "T1569.002": 46,
113
+ "T1570": 47,
114
+ "T1573.001": 48,
115
+ "T1574.002": 49
116
+ },
117
+ "layer_norm_eps": 1e-12,
118
+ "max_position_embeddings": 512,
119
+ "model_type": "bert",
120
+ "num_attention_heads": 12,
121
+ "num_hidden_layers": 12,
122
+ "pad_token_id": 0,
123
+ "position_embedding_type": "absolute",
124
+ "problem_type": "multi_label_classification",
125
+ "torch_dtype": "float32",
126
+ "transformers_version": "4.55.2",
127
+ "type_vocab_size": 2,
128
+ "use_cache": true,
129
+ "vocab_size": 30522
130
+ }
checkpoint-1310/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b81c8dbe0e3adead24655482edd7d58307c489f4c1bde815e2113e8d2dd770a
3
+ size 438106296
checkpoint-1310/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:171fd87005860246597f433e3603180f023b81e8a0f2505e0e67c44c4199a053
3
+ size 876333626
checkpoint-1310/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c618c5811371b0a381feb8897d7e4c7aad74dfdb7876ef33d484a4665b373580
3
+ size 14244
checkpoint-1310/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
3
+ size 988
checkpoint-1310/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6295884dcc01b54e68ae3c77a924c4f2dca4c185dfcba84534f814af9eb23a30
3
+ size 1064
checkpoint-1310/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-1310/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1310/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1310/trainer_state.json ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 917,
3
+ "best_metric": 0.4342857142857143,
4
+ "best_model_checkpoint": "./cysecbert-ttp-tram2_base_data/checkpoint-917",
5
+ "epoch": 10.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1310,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.3816793893129771,
14
+ "grad_norm": 36039.78125,
15
+ "learning_rate": 2.45e-05,
16
+ "loss": 0.5416,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.7633587786259542,
21
+ "grad_norm": 9505.58984375,
22
+ "learning_rate": 4.9500000000000004e-05,
23
+ "loss": 0.2038,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_f1_macro": 0.0,
29
+ "eval_f1_micro": 0.0,
30
+ "eval_loss": 0.04570664092898369,
31
+ "eval_precision": 0.0,
32
+ "eval_recall": 0.0,
33
+ "eval_runtime": 10.7376,
34
+ "eval_samples_per_second": 355.759,
35
+ "eval_steps_per_second": 14.901,
36
+ "step": 131
37
+ },
38
+ {
39
+ "epoch": 1.1450381679389312,
40
+ "grad_norm": 9370.513671875,
41
+ "learning_rate": 4.981153846153847e-05,
42
+ "loss": 0.1094,
43
+ "step": 150
44
+ },
45
+ {
46
+ "epoch": 1.5267175572519083,
47
+ "grad_norm": 7157.39892578125,
48
+ "learning_rate": 4.961923076923077e-05,
49
+ "loss": 0.1075,
50
+ "step": 200
51
+ },
52
+ {
53
+ "epoch": 1.9083969465648853,
54
+ "grad_norm": 9901.6123046875,
55
+ "learning_rate": 4.942692307692308e-05,
56
+ "loss": 0.1026,
57
+ "step": 250
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_f1_macro": 0.0,
62
+ "eval_f1_micro": 0.0,
63
+ "eval_loss": 0.04716825485229492,
64
+ "eval_precision": 0.0,
65
+ "eval_recall": 0.0,
66
+ "eval_runtime": 10.5515,
67
+ "eval_samples_per_second": 362.033,
68
+ "eval_steps_per_second": 15.164,
69
+ "step": 262
70
+ },
71
+ {
72
+ "epoch": 2.2900763358778624,
73
+ "grad_norm": 7338.14794921875,
74
+ "learning_rate": 4.923461538461539e-05,
75
+ "loss": 0.1003,
76
+ "step": 300
77
+ },
78
+ {
79
+ "epoch": 2.67175572519084,
80
+ "grad_norm": 10370.3720703125,
81
+ "learning_rate": 4.904230769230769e-05,
82
+ "loss": 0.0907,
83
+ "step": 350
84
+ },
85
+ {
86
+ "epoch": 3.0,
87
+ "eval_f1_macro": 0.04150333229523513,
88
+ "eval_f1_micro": 0.26657552973342447,
89
+ "eval_loss": 0.04108869284391403,
90
+ "eval_precision": 0.4431818181818182,
91
+ "eval_recall": 0.1906158357771261,
92
+ "eval_runtime": 10.6444,
93
+ "eval_samples_per_second": 358.876,
94
+ "eval_steps_per_second": 15.031,
95
+ "step": 393
96
+ },
97
+ {
98
+ "epoch": 3.053435114503817,
99
+ "grad_norm": 8239.685546875,
100
+ "learning_rate": 4.885e-05,
101
+ "loss": 0.0848,
102
+ "step": 400
103
+ },
104
+ {
105
+ "epoch": 3.435114503816794,
106
+ "grad_norm": 7363.1640625,
107
+ "learning_rate": 4.865769230769231e-05,
108
+ "loss": 0.074,
109
+ "step": 450
110
+ },
111
+ {
112
+ "epoch": 3.816793893129771,
113
+ "grad_norm": 10253.478515625,
114
+ "learning_rate": 4.8465384615384616e-05,
115
+ "loss": 0.0708,
116
+ "step": 500
117
+ },
118
+ {
119
+ "epoch": 4.0,
120
+ "eval_f1_macro": 0.0865137040218318,
121
+ "eval_f1_micro": 0.31753283837806967,
122
+ "eval_loss": 0.03653639927506447,
123
+ "eval_precision": 0.38186813186813184,
124
+ "eval_recall": 0.27174975562072334,
125
+ "eval_runtime": 10.3181,
126
+ "eval_samples_per_second": 370.223,
127
+ "eval_steps_per_second": 15.507,
128
+ "step": 524
129
+ },
130
+ {
131
+ "epoch": 4.198473282442748,
132
+ "grad_norm": 7632.10791015625,
133
+ "learning_rate": 4.827307692307693e-05,
134
+ "loss": 0.0632,
135
+ "step": 550
136
+ },
137
+ {
138
+ "epoch": 4.580152671755725,
139
+ "grad_norm": 9150.0556640625,
140
+ "learning_rate": 4.808076923076924e-05,
141
+ "loss": 0.0587,
142
+ "step": 600
143
+ },
144
+ {
145
+ "epoch": 4.961832061068702,
146
+ "grad_norm": 13003.1611328125,
147
+ "learning_rate": 4.788846153846154e-05,
148
+ "loss": 0.0536,
149
+ "step": 650
150
+ },
151
+ {
152
+ "epoch": 5.0,
153
+ "eval_f1_macro": 0.17183822511375405,
154
+ "eval_f1_micro": 0.33932302549101545,
155
+ "eval_loss": 0.03477426990866661,
156
+ "eval_precision": 0.29635036496350364,
157
+ "eval_recall": 0.396871945259042,
158
+ "eval_runtime": 10.2992,
159
+ "eval_samples_per_second": 370.901,
160
+ "eval_steps_per_second": 15.535,
161
+ "step": 655
162
+ },
163
+ {
164
+ "epoch": 5.34351145038168,
165
+ "grad_norm": 9582.8896484375,
166
+ "learning_rate": 4.7696153846153846e-05,
167
+ "loss": 0.0466,
168
+ "step": 700
169
+ },
170
+ {
171
+ "epoch": 5.7251908396946565,
172
+ "grad_norm": 10410.0546875,
173
+ "learning_rate": 4.750384615384616e-05,
174
+ "loss": 0.0427,
175
+ "step": 750
176
+ },
177
+ {
178
+ "epoch": 6.0,
179
+ "eval_f1_macro": 0.2823004051844873,
180
+ "eval_f1_micro": 0.4227574750830565,
181
+ "eval_loss": 0.03131383657455444,
182
+ "eval_precision": 0.3675090252707581,
183
+ "eval_recall": 0.4975562072336266,
184
+ "eval_runtime": 10.3222,
185
+ "eval_samples_per_second": 370.075,
186
+ "eval_steps_per_second": 15.501,
187
+ "step": 786
188
+ },
189
+ {
190
+ "epoch": 6.106870229007634,
191
+ "grad_norm": 8987.109375,
192
+ "learning_rate": 4.731153846153846e-05,
193
+ "loss": 0.0402,
194
+ "step": 800
195
+ },
196
+ {
197
+ "epoch": 6.488549618320611,
198
+ "grad_norm": 4473.130859375,
199
+ "learning_rate": 4.711923076923077e-05,
200
+ "loss": 0.0351,
201
+ "step": 850
202
+ },
203
+ {
204
+ "epoch": 6.870229007633588,
205
+ "grad_norm": 10426.6162109375,
206
+ "learning_rate": 4.692692307692308e-05,
207
+ "loss": 0.0337,
208
+ "step": 900
209
+ },
210
+ {
211
+ "epoch": 7.0,
212
+ "eval_f1_macro": 0.3363462665390104,
213
+ "eval_f1_micro": 0.4342857142857143,
214
+ "eval_loss": 0.03023417480289936,
215
+ "eval_precision": 0.35580524344569286,
216
+ "eval_recall": 0.5571847507331378,
217
+ "eval_runtime": 10.2654,
218
+ "eval_samples_per_second": 372.125,
219
+ "eval_steps_per_second": 15.586,
220
+ "step": 917
221
+ },
222
+ {
223
+ "epoch": 7.251908396946565,
224
+ "grad_norm": 8006.98779296875,
225
+ "learning_rate": 4.673461538461539e-05,
226
+ "loss": 0.0303,
227
+ "step": 950
228
+ },
229
+ {
230
+ "epoch": 7.633587786259542,
231
+ "grad_norm": 8583.732421875,
232
+ "learning_rate": 4.65423076923077e-05,
233
+ "loss": 0.0282,
234
+ "step": 1000
235
+ },
236
+ {
237
+ "epoch": 8.0,
238
+ "eval_f1_macro": 0.3390787375804454,
239
+ "eval_f1_micro": 0.41675248884311705,
240
+ "eval_loss": 0.031076261773705482,
241
+ "eval_precision": 0.32116402116402115,
242
+ "eval_recall": 0.5933528836754643,
243
+ "eval_runtime": 10.2543,
244
+ "eval_samples_per_second": 372.526,
245
+ "eval_steps_per_second": 15.603,
246
+ "step": 1048
247
+ },
248
+ {
249
+ "epoch": 8.01526717557252,
250
+ "grad_norm": 5588.6708984375,
251
+ "learning_rate": 4.635e-05,
252
+ "loss": 0.0281,
253
+ "step": 1050
254
+ },
255
+ {
256
+ "epoch": 8.396946564885496,
257
+ "grad_norm": 5759.66650390625,
258
+ "learning_rate": 4.6157692307692306e-05,
259
+ "loss": 0.0236,
260
+ "step": 1100
261
+ },
262
+ {
263
+ "epoch": 8.778625954198473,
264
+ "grad_norm": 7545.55810546875,
265
+ "learning_rate": 4.596538461538462e-05,
266
+ "loss": 0.0234,
267
+ "step": 1150
268
+ },
269
+ {
270
+ "epoch": 9.0,
271
+ "eval_f1_macro": 0.3290051589603852,
272
+ "eval_f1_micro": 0.42324983943481054,
273
+ "eval_loss": 0.03171388432383537,
274
+ "eval_precision": 0.3151602104256337,
275
+ "eval_recall": 0.6441837732160313,
276
+ "eval_runtime": 10.2276,
277
+ "eval_samples_per_second": 373.498,
278
+ "eval_steps_per_second": 15.644,
279
+ "step": 1179
280
+ },
281
+ {
282
+ "epoch": 9.16030534351145,
283
+ "grad_norm": 5176.86767578125,
284
+ "learning_rate": 4.577307692307692e-05,
285
+ "loss": 0.0217,
286
+ "step": 1200
287
+ },
288
+ {
289
+ "epoch": 9.541984732824428,
290
+ "grad_norm": 5463.861328125,
291
+ "learning_rate": 4.558076923076923e-05,
292
+ "loss": 0.0198,
293
+ "step": 1250
294
+ },
295
+ {
296
+ "epoch": 9.923664122137405,
297
+ "grad_norm": 2985.81591796875,
298
+ "learning_rate": 4.538846153846154e-05,
299
+ "loss": 0.0183,
300
+ "step": 1300
301
+ },
302
+ {
303
+ "epoch": 10.0,
304
+ "eval_f1_macro": 0.33764633344698114,
305
+ "eval_f1_micro": 0.4048391856004721,
306
+ "eval_loss": 0.033043116331100464,
307
+ "eval_precision": 0.28994082840236685,
308
+ "eval_recall": 0.6705767350928641,
309
+ "eval_runtime": 10.6264,
310
+ "eval_samples_per_second": 359.483,
311
+ "eval_steps_per_second": 15.057,
312
+ "step": 1310
313
+ }
314
+ ],
315
+ "logging_steps": 50,
316
+ "max_steps": 13100,
317
+ "num_input_tokens_seen": 0,
318
+ "num_train_epochs": 100,
319
+ "save_steps": 500,
320
+ "stateful_callbacks": {
321
+ "EarlyStoppingCallback": {
322
+ "args": {
323
+ "early_stopping_patience": 3,
324
+ "early_stopping_threshold": 0.0
325
+ },
326
+ "attributes": {
327
+ "early_stopping_patience_counter": 3
328
+ }
329
+ },
330
+ "TrainerControl": {
331
+ "args": {
332
+ "should_epoch_stop": false,
333
+ "should_evaluate": false,
334
+ "should_log": false,
335
+ "should_save": true,
336
+ "should_training_stop": true
337
+ },
338
+ "attributes": {}
339
+ }
340
+ },
341
+ "total_flos": 8236293009960960.0,
342
+ "train_batch_size": 24,
343
+ "trial_name": null,
344
+ "trial_params": null
345
+ }
checkpoint-1310/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aee2fca7ff2c999f9ae67a083c9553706cd4b3e2f09a64c6ebac2a0bd6a82c0
3
+ size 5368
checkpoint-1310/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-262/config.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1003.001",
13
+ "1": "T1005",
14
+ "2": "T1012",
15
+ "3": "T1016",
16
+ "4": "T1021.001",
17
+ "5": "T1027",
18
+ "6": "T1033",
19
+ "7": "T1036.005",
20
+ "8": "T1041",
21
+ "9": "T1047",
22
+ "10": "T1053.005",
23
+ "11": "T1055",
24
+ "12": "T1056.001",
25
+ "13": "T1057",
26
+ "14": "T1059.003",
27
+ "15": "T1068",
28
+ "16": "T1070.004",
29
+ "17": "T1071.001",
30
+ "18": "T1072",
31
+ "19": "T1074.001",
32
+ "20": "T1078",
33
+ "21": "T1082",
34
+ "22": "T1083",
35
+ "23": "T1090",
36
+ "24": "T1095",
37
+ "25": "T1105",
38
+ "26": "T1106",
39
+ "27": "T1110",
40
+ "28": "T1112",
41
+ "29": "T1113",
42
+ "30": "T1140",
43
+ "31": "T1190",
44
+ "32": "T1204.002",
45
+ "33": "T1210",
46
+ "34": "T1218.011",
47
+ "35": "T1219",
48
+ "36": "T1484.001",
49
+ "37": "T1518.001",
50
+ "38": "T1543.003",
51
+ "39": "T1547.001",
52
+ "40": "T1548.002",
53
+ "41": "T1552.001",
54
+ "42": "T1557.001",
55
+ "43": "T1562.001",
56
+ "44": "T1564.001",
57
+ "45": "T1566.001",
58
+ "46": "T1569.002",
59
+ "47": "T1570",
60
+ "48": "T1573.001",
61
+ "49": "T1574.002"
62
+ },
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 3072,
65
+ "label2id": {
66
+ "T1003.001": 0,
67
+ "T1005": 1,
68
+ "T1012": 2,
69
+ "T1016": 3,
70
+ "T1021.001": 4,
71
+ "T1027": 5,
72
+ "T1033": 6,
73
+ "T1036.005": 7,
74
+ "T1041": 8,
75
+ "T1047": 9,
76
+ "T1053.005": 10,
77
+ "T1055": 11,
78
+ "T1056.001": 12,
79
+ "T1057": 13,
80
+ "T1059.003": 14,
81
+ "T1068": 15,
82
+ "T1070.004": 16,
83
+ "T1071.001": 17,
84
+ "T1072": 18,
85
+ "T1074.001": 19,
86
+ "T1078": 20,
87
+ "T1082": 21,
88
+ "T1083": 22,
89
+ "T1090": 23,
90
+ "T1095": 24,
91
+ "T1105": 25,
92
+ "T1106": 26,
93
+ "T1110": 27,
94
+ "T1112": 28,
95
+ "T1113": 29,
96
+ "T1140": 30,
97
+ "T1190": 31,
98
+ "T1204.002": 32,
99
+ "T1210": 33,
100
+ "T1218.011": 34,
101
+ "T1219": 35,
102
+ "T1484.001": 36,
103
+ "T1518.001": 37,
104
+ "T1543.003": 38,
105
+ "T1547.001": 39,
106
+ "T1548.002": 40,
107
+ "T1552.001": 41,
108
+ "T1557.001": 42,
109
+ "T1562.001": 43,
110
+ "T1564.001": 44,
111
+ "T1566.001": 45,
112
+ "T1569.002": 46,
113
+ "T1570": 47,
114
+ "T1573.001": 48,
115
+ "T1574.002": 49
116
+ },
117
+ "layer_norm_eps": 1e-12,
118
+ "max_position_embeddings": 512,
119
+ "model_type": "bert",
120
+ "num_attention_heads": 12,
121
+ "num_hidden_layers": 12,
122
+ "pad_token_id": 0,
123
+ "position_embedding_type": "absolute",
124
+ "problem_type": "multi_label_classification",
125
+ "torch_dtype": "float32",
126
+ "transformers_version": "4.55.2",
127
+ "type_vocab_size": 2,
128
+ "use_cache": true,
129
+ "vocab_size": 30522
130
+ }
checkpoint-262/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2466a3e5355848fc410d9a9d3af85a5e20a977e5c5d89e0d5e2173a6a40a47a4
3
+ size 438106296