CaffeineThief commited on
Commit
a3c73e7
·
verified ·
1 Parent(s): 7f4dd2a

Upload saved model files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-104/config.json +152 -0
  2. checkpoint-104/model.safetensors +3 -0
  3. checkpoint-104/optimizer.pt +3 -0
  4. checkpoint-104/rng_state.pth +3 -0
  5. checkpoint-104/scaler.pt +3 -0
  6. checkpoint-104/scheduler.pt +3 -0
  7. checkpoint-104/special_tokens_map.json +37 -0
  8. checkpoint-104/tokenizer.json +0 -0
  9. checkpoint-104/tokenizer_config.json +56 -0
  10. checkpoint-104/trainer_state.json +81 -0
  11. checkpoint-104/training_args.bin +3 -0
  12. checkpoint-104/vocab.txt +0 -0
  13. checkpoint-1040/config.json +152 -0
  14. checkpoint-1040/model.safetensors +3 -0
  15. checkpoint-1040/optimizer.pt +3 -0
  16. checkpoint-1040/rng_state.pth +3 -0
  17. checkpoint-1040/scaler.pt +3 -0
  18. checkpoint-1040/scheduler.pt +3 -0
  19. checkpoint-1040/special_tokens_map.json +37 -0
  20. checkpoint-1040/tokenizer.json +0 -0
  21. checkpoint-1040/tokenizer_config.json +56 -0
  22. checkpoint-1040/trainer_state.json +423 -0
  23. checkpoint-1040/training_args.bin +3 -0
  24. checkpoint-1040/vocab.txt +0 -0
  25. checkpoint-1092/config.json +152 -0
  26. checkpoint-1092/model.safetensors +3 -0
  27. checkpoint-1092/optimizer.pt +3 -0
  28. checkpoint-1092/rng_state.pth +3 -0
  29. checkpoint-1092/scaler.pt +3 -0
  30. checkpoint-1092/scheduler.pt +3 -0
  31. checkpoint-1092/special_tokens_map.json +37 -0
  32. checkpoint-1092/tokenizer.json +0 -0
  33. checkpoint-1092/tokenizer_config.json +56 -0
  34. checkpoint-1092/trainer_state.json +442 -0
  35. checkpoint-1092/training_args.bin +3 -0
  36. checkpoint-1092/vocab.txt +0 -0
  37. checkpoint-1144/config.json +152 -0
  38. checkpoint-1144/model.safetensors +3 -0
  39. checkpoint-1144/optimizer.pt +3 -0
  40. checkpoint-1144/rng_state.pth +3 -0
  41. checkpoint-1144/scaler.pt +3 -0
  42. checkpoint-1144/scheduler.pt +3 -0
  43. checkpoint-1144/special_tokens_map.json +37 -0
  44. checkpoint-1144/tokenizer.json +0 -0
  45. checkpoint-1144/tokenizer_config.json +56 -0
  46. checkpoint-1144/trainer_state.json +461 -0
  47. checkpoint-1144/training_args.bin +3 -0
  48. checkpoint-1144/vocab.txt +0 -0
  49. checkpoint-1196/config.json +152 -0
  50. checkpoint-1196/model.safetensors +3 -0
checkpoint-104/config.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1005",
13
+ "1": "T1021",
14
+ "2": "T1027",
15
+ "3": "T1033",
16
+ "4": "T1036",
17
+ "5": "T1041",
18
+ "6": "T1046",
19
+ "7": "T1048",
20
+ "8": "T1049",
21
+ "9": "T1053",
22
+ "10": "T1055",
23
+ "11": "T1056",
24
+ "12": "T1057",
25
+ "13": "T1059",
26
+ "14": "T1070",
27
+ "15": "T1071",
28
+ "16": "T1074",
29
+ "17": "T1078",
30
+ "18": "T1082",
31
+ "19": "T1083",
32
+ "20": "T1098",
33
+ "21": "T1102",
34
+ "22": "T1105",
35
+ "23": "T1110",
36
+ "24": "T1113",
37
+ "25": "T1114",
38
+ "26": "T1115",
39
+ "27": "T1132",
40
+ "28": "T1137",
41
+ "29": "T1140",
42
+ "30": "T1189",
43
+ "31": "T1190",
44
+ "32": "T1195",
45
+ "33": "T1203",
46
+ "34": "T1204",
47
+ "35": "T1218",
48
+ "36": "T1486",
49
+ "37": "T1491",
50
+ "38": "T1496",
51
+ "39": "T1497",
52
+ "40": "T1499",
53
+ "41": "T1528",
54
+ "42": "T1539",
55
+ "43": "T1547",
56
+ "44": "T1555",
57
+ "45": "T1557",
58
+ "46": "T1562",
59
+ "47": "T1564",
60
+ "48": "T1566",
61
+ "49": "T1567",
62
+ "50": "T1573",
63
+ "51": "T1574",
64
+ "52": "T1583",
65
+ "53": "T1586",
66
+ "54": "T1589",
67
+ "55": "T1606",
68
+ "56": "T1608",
69
+ "57": "T1614",
70
+ "58": "T1620",
71
+ "59": "T1623.001",
72
+ "60": "T1631.001"
73
+ },
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 3072,
76
+ "label2id": {
77
+ "T1005": 0,
78
+ "T1021": 1,
79
+ "T1027": 2,
80
+ "T1033": 3,
81
+ "T1036": 4,
82
+ "T1041": 5,
83
+ "T1046": 6,
84
+ "T1048": 7,
85
+ "T1049": 8,
86
+ "T1053": 9,
87
+ "T1055": 10,
88
+ "T1056": 11,
89
+ "T1057": 12,
90
+ "T1059": 13,
91
+ "T1070": 14,
92
+ "T1071": 15,
93
+ "T1074": 16,
94
+ "T1078": 17,
95
+ "T1082": 18,
96
+ "T1083": 19,
97
+ "T1098": 20,
98
+ "T1102": 21,
99
+ "T1105": 22,
100
+ "T1110": 23,
101
+ "T1113": 24,
102
+ "T1114": 25,
103
+ "T1115": 26,
104
+ "T1132": 27,
105
+ "T1137": 28,
106
+ "T1140": 29,
107
+ "T1189": 30,
108
+ "T1190": 31,
109
+ "T1195": 32,
110
+ "T1203": 33,
111
+ "T1204": 34,
112
+ "T1218": 35,
113
+ "T1486": 36,
114
+ "T1491": 37,
115
+ "T1496": 38,
116
+ "T1497": 39,
117
+ "T1499": 40,
118
+ "T1528": 41,
119
+ "T1539": 42,
120
+ "T1547": 43,
121
+ "T1555": 44,
122
+ "T1557": 45,
123
+ "T1562": 46,
124
+ "T1564": 47,
125
+ "T1566": 48,
126
+ "T1567": 49,
127
+ "T1573": 50,
128
+ "T1574": 51,
129
+ "T1583": 52,
130
+ "T1586": 53,
131
+ "T1589": 54,
132
+ "T1606": 55,
133
+ "T1608": 56,
134
+ "T1614": 57,
135
+ "T1620": 58,
136
+ "T1623.001": 59,
137
+ "T1631.001": 60
138
+ },
139
+ "layer_norm_eps": 1e-12,
140
+ "max_position_embeddings": 512,
141
+ "model_type": "bert",
142
+ "num_attention_heads": 12,
143
+ "num_hidden_layers": 12,
144
+ "pad_token_id": 0,
145
+ "position_embedding_type": "absolute",
146
+ "problem_type": "multi_label_classification",
147
+ "torch_dtype": "float32",
148
+ "transformers_version": "4.55.2",
149
+ "type_vocab_size": 2,
150
+ "use_cache": true,
151
+ "vocab_size": 30522
152
+ }
checkpoint-104/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df2fe92b1e0876a7c8d36194c053ae9bcbfd001ece2aa27e0155ad80d63eb4f
3
+ size 438140132
checkpoint-104/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6fc31b3eb14051342e97bf41918a5fdf23de8ad17d018cc2b365fee6382a97
3
+ size 876401210
checkpoint-104/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736858f0ba9663f7ef1e0de90df5597ae3a880c27538fc5eb96e2cb1485eb4e5
3
+ size 14244
checkpoint-104/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
3
+ size 988
checkpoint-104/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:279117f7d4cc0f14fd1502f1e503dda0c2168ebe4feb368500cd5598d6b2c574
3
+ size 1064
checkpoint-104/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-104/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-104/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-104/trainer_state.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 52,
3
+ "best_metric": 0.0,
4
+ "best_model_checkpoint": "./cysecbert-ttp-annoctr_step2/checkpoint-52",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 104,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9615384615384616,
14
+ "grad_norm": 35970.37109375,
15
+ "learning_rate": 2.45e-05,
16
+ "loss": 0.5612,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_f1_macro": 0.0,
22
+ "eval_f1_micro": 0.0,
23
+ "eval_loss": 0.33502161502838135,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 1.6495,
27
+ "eval_samples_per_second": 375.276,
28
+ "eval_steps_per_second": 15.763,
29
+ "step": 52
30
+ },
31
+ {
32
+ "epoch": 1.9230769230769231,
33
+ "grad_norm": 10958.1689453125,
34
+ "learning_rate": 4.9500000000000004e-05,
35
+ "loss": 0.214,
36
+ "step": 100
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_f1_macro": 0.0,
41
+ "eval_f1_micro": 0.0,
42
+ "eval_loss": 0.09798076748847961,
43
+ "eval_precision": 0.0,
44
+ "eval_recall": 0.0,
45
+ "eval_runtime": 1.6513,
46
+ "eval_samples_per_second": 374.861,
47
+ "eval_steps_per_second": 15.745,
48
+ "step": 104
49
+ }
50
+ ],
51
+ "logging_steps": 50,
52
+ "max_steps": 5200,
53
+ "num_input_tokens_seen": 0,
54
+ "num_train_epochs": 100,
55
+ "save_steps": 500,
56
+ "stateful_callbacks": {
57
+ "EarlyStoppingCallback": {
58
+ "args": {
59
+ "early_stopping_patience": 5,
60
+ "early_stopping_threshold": 0.0
61
+ },
62
+ "attributes": {
63
+ "early_stopping_patience_counter": 1
64
+ }
65
+ },
66
+ "TrainerControl": {
67
+ "args": {
68
+ "should_epoch_stop": false,
69
+ "should_evaluate": false,
70
+ "should_log": false,
71
+ "should_save": true,
72
+ "should_training_stop": false
73
+ },
74
+ "attributes": {}
75
+ }
76
+ },
77
+ "total_flos": 653914080718848.0,
78
+ "train_batch_size": 24,
79
+ "trial_name": null,
80
+ "trial_params": null
81
+ }
checkpoint-104/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f05e8606862008bfc17115034db9429cc42bab3677cf65b2b782cae0ed9dfed
3
+ size 5368
checkpoint-104/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1040/config.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1005",
13
+ "1": "T1021",
14
+ "2": "T1027",
15
+ "3": "T1033",
16
+ "4": "T1036",
17
+ "5": "T1041",
18
+ "6": "T1046",
19
+ "7": "T1048",
20
+ "8": "T1049",
21
+ "9": "T1053",
22
+ "10": "T1055",
23
+ "11": "T1056",
24
+ "12": "T1057",
25
+ "13": "T1059",
26
+ "14": "T1070",
27
+ "15": "T1071",
28
+ "16": "T1074",
29
+ "17": "T1078",
30
+ "18": "T1082",
31
+ "19": "T1083",
32
+ "20": "T1098",
33
+ "21": "T1102",
34
+ "22": "T1105",
35
+ "23": "T1110",
36
+ "24": "T1113",
37
+ "25": "T1114",
38
+ "26": "T1115",
39
+ "27": "T1132",
40
+ "28": "T1137",
41
+ "29": "T1140",
42
+ "30": "T1189",
43
+ "31": "T1190",
44
+ "32": "T1195",
45
+ "33": "T1203",
46
+ "34": "T1204",
47
+ "35": "T1218",
48
+ "36": "T1486",
49
+ "37": "T1491",
50
+ "38": "T1496",
51
+ "39": "T1497",
52
+ "40": "T1499",
53
+ "41": "T1528",
54
+ "42": "T1539",
55
+ "43": "T1547",
56
+ "44": "T1555",
57
+ "45": "T1557",
58
+ "46": "T1562",
59
+ "47": "T1564",
60
+ "48": "T1566",
61
+ "49": "T1567",
62
+ "50": "T1573",
63
+ "51": "T1574",
64
+ "52": "T1583",
65
+ "53": "T1586",
66
+ "54": "T1589",
67
+ "55": "T1606",
68
+ "56": "T1608",
69
+ "57": "T1614",
70
+ "58": "T1620",
71
+ "59": "T1623.001",
72
+ "60": "T1631.001"
73
+ },
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 3072,
76
+ "label2id": {
77
+ "T1005": 0,
78
+ "T1021": 1,
79
+ "T1027": 2,
80
+ "T1033": 3,
81
+ "T1036": 4,
82
+ "T1041": 5,
83
+ "T1046": 6,
84
+ "T1048": 7,
85
+ "T1049": 8,
86
+ "T1053": 9,
87
+ "T1055": 10,
88
+ "T1056": 11,
89
+ "T1057": 12,
90
+ "T1059": 13,
91
+ "T1070": 14,
92
+ "T1071": 15,
93
+ "T1074": 16,
94
+ "T1078": 17,
95
+ "T1082": 18,
96
+ "T1083": 19,
97
+ "T1098": 20,
98
+ "T1102": 21,
99
+ "T1105": 22,
100
+ "T1110": 23,
101
+ "T1113": 24,
102
+ "T1114": 25,
103
+ "T1115": 26,
104
+ "T1132": 27,
105
+ "T1137": 28,
106
+ "T1140": 29,
107
+ "T1189": 30,
108
+ "T1190": 31,
109
+ "T1195": 32,
110
+ "T1203": 33,
111
+ "T1204": 34,
112
+ "T1218": 35,
113
+ "T1486": 36,
114
+ "T1491": 37,
115
+ "T1496": 38,
116
+ "T1497": 39,
117
+ "T1499": 40,
118
+ "T1528": 41,
119
+ "T1539": 42,
120
+ "T1547": 43,
121
+ "T1555": 44,
122
+ "T1557": 45,
123
+ "T1562": 46,
124
+ "T1564": 47,
125
+ "T1566": 48,
126
+ "T1567": 49,
127
+ "T1573": 50,
128
+ "T1574": 51,
129
+ "T1583": 52,
130
+ "T1586": 53,
131
+ "T1589": 54,
132
+ "T1606": 55,
133
+ "T1608": 56,
134
+ "T1614": 57,
135
+ "T1620": 58,
136
+ "T1623.001": 59,
137
+ "T1631.001": 60
138
+ },
139
+ "layer_norm_eps": 1e-12,
140
+ "max_position_embeddings": 512,
141
+ "model_type": "bert",
142
+ "num_attention_heads": 12,
143
+ "num_hidden_layers": 12,
144
+ "pad_token_id": 0,
145
+ "position_embedding_type": "absolute",
146
+ "problem_type": "multi_label_classification",
147
+ "torch_dtype": "float32",
148
+ "transformers_version": "4.55.2",
149
+ "type_vocab_size": 2,
150
+ "use_cache": true,
151
+ "vocab_size": 30522
152
+ }
checkpoint-1040/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b896e079d9638f588e023e01f960378b8d230266d7f5f4deea75180d814f7944
3
+ size 438140132
checkpoint-1040/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:933df3f27a3b805d3c573fb31e6305a70b24efec69367e894738e2f1ce088a77
3
+ size 876401210
checkpoint-1040/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace5adf5b6afced75a55131c69efa38017a45543db99fff32a13305899ced141
3
+ size 14244
checkpoint-1040/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
3
+ size 988
checkpoint-1040/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd306eccedd3d701a952a9eb783d3bc211c2cad0601f18a968e26cc19d0363b
3
+ size 1064
checkpoint-1040/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-1040/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1040/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1040/trainer_state.json ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 988,
3
+ "best_metric": 0.6585365853658537,
4
+ "best_model_checkpoint": "./cysecbert-ttp-annoctr_step2/checkpoint-988",
5
+ "epoch": 20.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1040,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9615384615384616,
14
+ "grad_norm": 35970.37109375,
15
+ "learning_rate": 2.45e-05,
16
+ "loss": 0.5612,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_f1_macro": 0.0,
22
+ "eval_f1_micro": 0.0,
23
+ "eval_loss": 0.33502161502838135,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 1.6495,
27
+ "eval_samples_per_second": 375.276,
28
+ "eval_steps_per_second": 15.763,
29
+ "step": 52
30
+ },
31
+ {
32
+ "epoch": 1.9230769230769231,
33
+ "grad_norm": 10958.1689453125,
34
+ "learning_rate": 4.9500000000000004e-05,
35
+ "loss": 0.214,
36
+ "step": 100
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_f1_macro": 0.0,
41
+ "eval_f1_micro": 0.0,
42
+ "eval_loss": 0.09798076748847961,
43
+ "eval_precision": 0.0,
44
+ "eval_recall": 0.0,
45
+ "eval_runtime": 1.6513,
46
+ "eval_samples_per_second": 374.861,
47
+ "eval_steps_per_second": 15.745,
48
+ "step": 104
49
+ },
50
+ {
51
+ "epoch": 2.8846153846153846,
52
+ "grad_norm": 5634.60546875,
53
+ "learning_rate": 4.951960784313726e-05,
54
+ "loss": 0.0873,
55
+ "step": 150
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_f1_macro": 0.0,
60
+ "eval_f1_micro": 0.0,
61
+ "eval_loss": 0.07998213171958923,
62
+ "eval_precision": 0.0,
63
+ "eval_recall": 0.0,
64
+ "eval_runtime": 1.6587,
65
+ "eval_samples_per_second": 373.195,
66
+ "eval_steps_per_second": 15.675,
67
+ "step": 156
68
+ },
69
+ {
70
+ "epoch": 3.8461538461538463,
71
+ "grad_norm": 6965.6220703125,
72
+ "learning_rate": 4.9029411764705883e-05,
73
+ "loss": 0.0791,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 4.0,
78
+ "eval_f1_macro": 0.0,
79
+ "eval_f1_micro": 0.0,
80
+ "eval_loss": 0.07983831316232681,
81
+ "eval_precision": 0.0,
82
+ "eval_recall": 0.0,
83
+ "eval_runtime": 1.6476,
84
+ "eval_samples_per_second": 375.688,
85
+ "eval_steps_per_second": 15.78,
86
+ "step": 208
87
+ },
88
+ {
89
+ "epoch": 4.8076923076923075,
90
+ "grad_norm": 4831.40478515625,
91
+ "learning_rate": 4.8539215686274515e-05,
92
+ "loss": 0.0777,
93
+ "step": 250
94
+ },
95
+ {
96
+ "epoch": 5.0,
97
+ "eval_f1_macro": 0.0,
98
+ "eval_f1_micro": 0.0,
99
+ "eval_loss": 0.07502060383558273,
100
+ "eval_precision": 0.0,
101
+ "eval_recall": 0.0,
102
+ "eval_runtime": 1.6796,
103
+ "eval_samples_per_second": 368.535,
104
+ "eval_steps_per_second": 15.48,
105
+ "step": 260
106
+ },
107
+ {
108
+ "epoch": 5.769230769230769,
109
+ "grad_norm": 7363.94580078125,
110
+ "learning_rate": 4.804901960784314e-05,
111
+ "loss": 0.0716,
112
+ "step": 300
113
+ },
114
+ {
115
+ "epoch": 6.0,
116
+ "eval_f1_macro": 0.038923099933562195,
117
+ "eval_f1_micro": 0.47416413373860183,
118
+ "eval_loss": 0.0661635547876358,
119
+ "eval_precision": 0.9069767441860465,
120
+ "eval_recall": 0.32098765432098764,
121
+ "eval_runtime": 1.6554,
122
+ "eval_samples_per_second": 373.937,
123
+ "eval_steps_per_second": 15.707,
124
+ "step": 312
125
+ },
126
+ {
127
+ "epoch": 6.730769230769231,
128
+ "grad_norm": 6673.61376953125,
129
+ "learning_rate": 4.7558823529411766e-05,
130
+ "loss": 0.0612,
131
+ "step": 350
132
+ },
133
+ {
134
+ "epoch": 7.0,
135
+ "eval_f1_macro": 0.04364161045234521,
136
+ "eval_f1_micro": 0.502835538752363,
137
+ "eval_loss": 0.061204444617033005,
138
+ "eval_precision": 0.8085106382978723,
139
+ "eval_recall": 0.36488340192043894,
140
+ "eval_runtime": 1.6748,
141
+ "eval_samples_per_second": 369.593,
142
+ "eval_steps_per_second": 15.524,
143
+ "step": 364
144
+ },
145
+ {
146
+ "epoch": 7.6923076923076925,
147
+ "grad_norm": 6426.9541015625,
148
+ "learning_rate": 4.70686274509804e-05,
149
+ "loss": 0.0526,
150
+ "step": 400
151
+ },
152
+ {
153
+ "epoch": 8.0,
154
+ "eval_f1_macro": 0.07288718124374782,
155
+ "eval_f1_micro": 0.5546522131887985,
156
+ "eval_loss": 0.05695081874728203,
157
+ "eval_precision": 0.8121693121693122,
158
+ "eval_recall": 0.42112482853223593,
159
+ "eval_runtime": 1.665,
160
+ "eval_samples_per_second": 371.782,
161
+ "eval_steps_per_second": 15.616,
162
+ "step": 416
163
+ },
164
+ {
165
+ "epoch": 8.653846153846153,
166
+ "grad_norm": 9758.2529296875,
167
+ "learning_rate": 4.6578431372549016e-05,
168
+ "loss": 0.0433,
169
+ "step": 450
170
+ },
171
+ {
172
+ "epoch": 9.0,
173
+ "eval_f1_macro": 0.07471540750966602,
174
+ "eval_f1_micro": 0.545950864422202,
175
+ "eval_loss": 0.05365221947431564,
176
+ "eval_precision": 0.8108108108108109,
177
+ "eval_recall": 0.411522633744856,
178
+ "eval_runtime": 1.6749,
179
+ "eval_samples_per_second": 369.564,
180
+ "eval_steps_per_second": 15.523,
181
+ "step": 468
182
+ },
183
+ {
184
+ "epoch": 9.615384615384615,
185
+ "grad_norm": 3793.51953125,
186
+ "learning_rate": 4.608823529411765e-05,
187
+ "loss": 0.0397,
188
+ "step": 500
189
+ },
190
+ {
191
+ "epoch": 10.0,
192
+ "eval_f1_macro": 0.12734138202225653,
193
+ "eval_f1_micro": 0.5939086294416244,
194
+ "eval_loss": 0.052679501473903656,
195
+ "eval_precision": 0.7748344370860927,
196
+ "eval_recall": 0.48148148148148145,
197
+ "eval_runtime": 1.6561,
198
+ "eval_samples_per_second": 373.762,
199
+ "eval_steps_per_second": 15.699,
200
+ "step": 520
201
+ },
202
+ {
203
+ "epoch": 10.576923076923077,
204
+ "grad_norm": 4446.76611328125,
205
+ "learning_rate": 4.559803921568628e-05,
206
+ "loss": 0.0329,
207
+ "step": 550
208
+ },
209
+ {
210
+ "epoch": 11.0,
211
+ "eval_f1_macro": 0.1493812021209619,
212
+ "eval_f1_micro": 0.609735269000854,
213
+ "eval_loss": 0.05011816695332527,
214
+ "eval_precision": 0.8076923076923077,
215
+ "eval_recall": 0.4897119341563786,
216
+ "eval_runtime": 1.6806,
217
+ "eval_samples_per_second": 368.324,
218
+ "eval_steps_per_second": 15.471,
219
+ "step": 572
220
+ },
221
+ {
222
+ "epoch": 11.538461538461538,
223
+ "grad_norm": 5606.7880859375,
224
+ "learning_rate": 4.51078431372549e-05,
225
+ "loss": 0.0286,
226
+ "step": 600
227
+ },
228
+ {
229
+ "epoch": 12.0,
230
+ "eval_f1_macro": 0.19655294907558352,
231
+ "eval_f1_micro": 0.6096959737058341,
232
+ "eval_loss": 0.05061562359333038,
233
+ "eval_precision": 0.7602459016393442,
234
+ "eval_recall": 0.5089163237311386,
235
+ "eval_runtime": 1.6713,
236
+ "eval_samples_per_second": 370.373,
237
+ "eval_steps_per_second": 15.557,
238
+ "step": 624
239
+ },
240
+ {
241
+ "epoch": 12.5,
242
+ "grad_norm": 5415.85546875,
243
+ "learning_rate": 4.461764705882353e-05,
244
+ "loss": 0.0254,
245
+ "step": 650
246
+ },
247
+ {
248
+ "epoch": 13.0,
249
+ "eval_f1_macro": 0.20270281237441773,
250
+ "eval_f1_micro": 0.6230831315577078,
251
+ "eval_loss": 0.04895725101232529,
252
+ "eval_precision": 0.7568627450980392,
253
+ "eval_recall": 0.5294924554183813,
254
+ "eval_runtime": 2.2359,
255
+ "eval_samples_per_second": 276.851,
256
+ "eval_steps_per_second": 11.629,
257
+ "step": 676
258
+ },
259
+ {
260
+ "epoch": 13.461538461538462,
261
+ "grad_norm": 6214.2744140625,
262
+ "learning_rate": 4.412745098039216e-05,
263
+ "loss": 0.023,
264
+ "step": 700
265
+ },
266
+ {
267
+ "epoch": 14.0,
268
+ "eval_f1_macro": 0.21593787964288247,
269
+ "eval_f1_micro": 0.6309904153354633,
270
+ "eval_loss": 0.047714490443468094,
271
+ "eval_precision": 0.7552581261950286,
272
+ "eval_recall": 0.541838134430727,
273
+ "eval_runtime": 1.7054,
274
+ "eval_samples_per_second": 362.956,
275
+ "eval_steps_per_second": 15.245,
276
+ "step": 728
277
+ },
278
+ {
279
+ "epoch": 14.423076923076923,
280
+ "grad_norm": 5322.5205078125,
281
+ "learning_rate": 4.363725490196079e-05,
282
+ "loss": 0.0202,
283
+ "step": 750
284
+ },
285
+ {
286
+ "epoch": 15.0,
287
+ "eval_f1_macro": 0.2127885503653234,
288
+ "eval_f1_micro": 0.6297739672642245,
289
+ "eval_loss": 0.04875025525689125,
290
+ "eval_precision": 0.7292418772563177,
291
+ "eval_recall": 0.5541838134430727,
292
+ "eval_runtime": 1.6728,
293
+ "eval_samples_per_second": 370.029,
294
+ "eval_steps_per_second": 15.542,
295
+ "step": 780
296
+ },
297
+ {
298
+ "epoch": 15.384615384615385,
299
+ "grad_norm": 3209.152587890625,
300
+ "learning_rate": 4.3147058823529413e-05,
301
+ "loss": 0.0186,
302
+ "step": 800
303
+ },
304
+ {
305
+ "epoch": 16.0,
306
+ "eval_f1_macro": 0.2234013473846607,
307
+ "eval_f1_micro": 0.6476484194294526,
308
+ "eval_loss": 0.04788675159215927,
309
+ "eval_precision": 0.7394366197183099,
310
+ "eval_recall": 0.5761316872427984,
311
+ "eval_runtime": 1.6703,
312
+ "eval_samples_per_second": 370.591,
313
+ "eval_steps_per_second": 15.566,
314
+ "step": 832
315
+ },
316
+ {
317
+ "epoch": 16.346153846153847,
318
+ "grad_norm": 6189.79296875,
319
+ "learning_rate": 4.265686274509804e-05,
320
+ "loss": 0.0166,
321
+ "step": 850
322
+ },
323
+ {
324
+ "epoch": 17.0,
325
+ "eval_f1_macro": 0.2461245877407925,
326
+ "eval_f1_micro": 0.6392067124332571,
327
+ "eval_loss": 0.047994960099458694,
328
+ "eval_precision": 0.7199312714776632,
329
+ "eval_recall": 0.5747599451303155,
330
+ "eval_runtime": 1.6784,
331
+ "eval_samples_per_second": 368.8,
332
+ "eval_steps_per_second": 15.491,
333
+ "step": 884
334
+ },
335
+ {
336
+ "epoch": 17.307692307692307,
337
+ "grad_norm": 2584.114013671875,
338
+ "learning_rate": 4.216666666666667e-05,
339
+ "loss": 0.0148,
340
+ "step": 900
341
+ },
342
+ {
343
+ "epoch": 18.0,
344
+ "eval_f1_macro": 0.2672202679954545,
345
+ "eval_f1_micro": 0.6533742331288344,
346
+ "eval_loss": 0.04750063270330429,
347
+ "eval_precision": 0.7408695652173913,
348
+ "eval_recall": 0.5843621399176955,
349
+ "eval_runtime": 1.6744,
350
+ "eval_samples_per_second": 369.683,
351
+ "eval_steps_per_second": 15.528,
352
+ "step": 936
353
+ },
354
+ {
355
+ "epoch": 18.26923076923077,
356
+ "grad_norm": 2617.191650390625,
357
+ "learning_rate": 4.1676470588235296e-05,
358
+ "loss": 0.0128,
359
+ "step": 950
360
+ },
361
+ {
362
+ "epoch": 19.0,
363
+ "eval_f1_macro": 0.25478648592377506,
364
+ "eval_f1_micro": 0.6585365853658537,
365
+ "eval_loss": 0.04794125631451607,
366
+ "eval_precision": 0.7409948542024014,
367
+ "eval_recall": 0.5925925925925926,
368
+ "eval_runtime": 1.6751,
369
+ "eval_samples_per_second": 369.525,
370
+ "eval_steps_per_second": 15.521,
371
+ "step": 988
372
+ },
373
+ {
374
+ "epoch": 19.23076923076923,
375
+ "grad_norm": 2930.471923828125,
376
+ "learning_rate": 4.118627450980392e-05,
377
+ "loss": 0.0116,
378
+ "step": 1000
379
+ },
380
+ {
381
+ "epoch": 20.0,
382
+ "eval_f1_macro": 0.2563341548388003,
383
+ "eval_f1_micro": 0.6470143613000756,
384
+ "eval_loss": 0.04850601404905319,
385
+ "eval_precision": 0.7205387205387206,
386
+ "eval_recall": 0.5871056241426612,
387
+ "eval_runtime": 1.6688,
388
+ "eval_samples_per_second": 370.936,
389
+ "eval_steps_per_second": 15.58,
390
+ "step": 1040
391
+ }
392
+ ],
393
+ "logging_steps": 50,
394
+ "max_steps": 5200,
395
+ "num_input_tokens_seen": 0,
396
+ "num_train_epochs": 100,
397
+ "save_steps": 500,
398
+ "stateful_callbacks": {
399
+ "EarlyStoppingCallback": {
400
+ "args": {
401
+ "early_stopping_patience": 5,
402
+ "early_stopping_threshold": 0.0
403
+ },
404
+ "attributes": {
405
+ "early_stopping_patience_counter": 1
406
+ }
407
+ },
408
+ "TrainerControl": {
409
+ "args": {
410
+ "should_epoch_stop": false,
411
+ "should_evaluate": false,
412
+ "should_log": false,
413
+ "should_save": true,
414
+ "should_training_stop": false
415
+ },
416
+ "attributes": {}
417
+ }
418
+ },
419
+ "total_flos": 6539140807188480.0,
420
+ "train_batch_size": 24,
421
+ "trial_name": null,
422
+ "trial_params": null
423
+ }
checkpoint-1040/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f05e8606862008bfc17115034db9429cc42bab3677cf65b2b782cae0ed9dfed
3
+ size 5368
checkpoint-1040/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1092/config.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1005",
13
+ "1": "T1021",
14
+ "2": "T1027",
15
+ "3": "T1033",
16
+ "4": "T1036",
17
+ "5": "T1041",
18
+ "6": "T1046",
19
+ "7": "T1048",
20
+ "8": "T1049",
21
+ "9": "T1053",
22
+ "10": "T1055",
23
+ "11": "T1056",
24
+ "12": "T1057",
25
+ "13": "T1059",
26
+ "14": "T1070",
27
+ "15": "T1071",
28
+ "16": "T1074",
29
+ "17": "T1078",
30
+ "18": "T1082",
31
+ "19": "T1083",
32
+ "20": "T1098",
33
+ "21": "T1102",
34
+ "22": "T1105",
35
+ "23": "T1110",
36
+ "24": "T1113",
37
+ "25": "T1114",
38
+ "26": "T1115",
39
+ "27": "T1132",
40
+ "28": "T1137",
41
+ "29": "T1140",
42
+ "30": "T1189",
43
+ "31": "T1190",
44
+ "32": "T1195",
45
+ "33": "T1203",
46
+ "34": "T1204",
47
+ "35": "T1218",
48
+ "36": "T1486",
49
+ "37": "T1491",
50
+ "38": "T1496",
51
+ "39": "T1497",
52
+ "40": "T1499",
53
+ "41": "T1528",
54
+ "42": "T1539",
55
+ "43": "T1547",
56
+ "44": "T1555",
57
+ "45": "T1557",
58
+ "46": "T1562",
59
+ "47": "T1564",
60
+ "48": "T1566",
61
+ "49": "T1567",
62
+ "50": "T1573",
63
+ "51": "T1574",
64
+ "52": "T1583",
65
+ "53": "T1586",
66
+ "54": "T1589",
67
+ "55": "T1606",
68
+ "56": "T1608",
69
+ "57": "T1614",
70
+ "58": "T1620",
71
+ "59": "T1623.001",
72
+ "60": "T1631.001"
73
+ },
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 3072,
76
+ "label2id": {
77
+ "T1005": 0,
78
+ "T1021": 1,
79
+ "T1027": 2,
80
+ "T1033": 3,
81
+ "T1036": 4,
82
+ "T1041": 5,
83
+ "T1046": 6,
84
+ "T1048": 7,
85
+ "T1049": 8,
86
+ "T1053": 9,
87
+ "T1055": 10,
88
+ "T1056": 11,
89
+ "T1057": 12,
90
+ "T1059": 13,
91
+ "T1070": 14,
92
+ "T1071": 15,
93
+ "T1074": 16,
94
+ "T1078": 17,
95
+ "T1082": 18,
96
+ "T1083": 19,
97
+ "T1098": 20,
98
+ "T1102": 21,
99
+ "T1105": 22,
100
+ "T1110": 23,
101
+ "T1113": 24,
102
+ "T1114": 25,
103
+ "T1115": 26,
104
+ "T1132": 27,
105
+ "T1137": 28,
106
+ "T1140": 29,
107
+ "T1189": 30,
108
+ "T1190": 31,
109
+ "T1195": 32,
110
+ "T1203": 33,
111
+ "T1204": 34,
112
+ "T1218": 35,
113
+ "T1486": 36,
114
+ "T1491": 37,
115
+ "T1496": 38,
116
+ "T1497": 39,
117
+ "T1499": 40,
118
+ "T1528": 41,
119
+ "T1539": 42,
120
+ "T1547": 43,
121
+ "T1555": 44,
122
+ "T1557": 45,
123
+ "T1562": 46,
124
+ "T1564": 47,
125
+ "T1566": 48,
126
+ "T1567": 49,
127
+ "T1573": 50,
128
+ "T1574": 51,
129
+ "T1583": 52,
130
+ "T1586": 53,
131
+ "T1589": 54,
132
+ "T1606": 55,
133
+ "T1608": 56,
134
+ "T1614": 57,
135
+ "T1620": 58,
136
+ "T1623.001": 59,
137
+ "T1631.001": 60
138
+ },
139
+ "layer_norm_eps": 1e-12,
140
+ "max_position_embeddings": 512,
141
+ "model_type": "bert",
142
+ "num_attention_heads": 12,
143
+ "num_hidden_layers": 12,
144
+ "pad_token_id": 0,
145
+ "position_embedding_type": "absolute",
146
+ "problem_type": "multi_label_classification",
147
+ "torch_dtype": "float32",
148
+ "transformers_version": "4.55.2",
149
+ "type_vocab_size": 2,
150
+ "use_cache": true,
151
+ "vocab_size": 30522
152
+ }
checkpoint-1092/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9fb7b7785f54b70538249b6e13313b0908ac89988469530be141016d608dfcd
3
+ size 438140132
checkpoint-1092/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc910d9686defdb41ffd883291715f2b22578d048fb693839b20d4792359cac4
3
+ size 876401210
checkpoint-1092/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5988312761947fb69c8c90536e0665fe3c0ab245c725afc50794702d835f93b2
3
+ size 14244
checkpoint-1092/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
3
+ size 988
checkpoint-1092/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd7f8b20fb7f9669142e0bbf4b736114d8316d103a75f1eb23facb1d1d7569f
3
+ size 1064
checkpoint-1092/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-1092/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1092/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1092/trainer_state.json ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 988,
3
+ "best_metric": 0.6585365853658537,
4
+ "best_model_checkpoint": "./cysecbert-ttp-annoctr_step2/checkpoint-988",
5
+ "epoch": 21.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1092,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9615384615384616,
14
+ "grad_norm": 35970.37109375,
15
+ "learning_rate": 2.45e-05,
16
+ "loss": 0.5612,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_f1_macro": 0.0,
22
+ "eval_f1_micro": 0.0,
23
+ "eval_loss": 0.33502161502838135,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 1.6495,
27
+ "eval_samples_per_second": 375.276,
28
+ "eval_steps_per_second": 15.763,
29
+ "step": 52
30
+ },
31
+ {
32
+ "epoch": 1.9230769230769231,
33
+ "grad_norm": 10958.1689453125,
34
+ "learning_rate": 4.9500000000000004e-05,
35
+ "loss": 0.214,
36
+ "step": 100
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_f1_macro": 0.0,
41
+ "eval_f1_micro": 0.0,
42
+ "eval_loss": 0.09798076748847961,
43
+ "eval_precision": 0.0,
44
+ "eval_recall": 0.0,
45
+ "eval_runtime": 1.6513,
46
+ "eval_samples_per_second": 374.861,
47
+ "eval_steps_per_second": 15.745,
48
+ "step": 104
49
+ },
50
+ {
51
+ "epoch": 2.8846153846153846,
52
+ "grad_norm": 5634.60546875,
53
+ "learning_rate": 4.951960784313726e-05,
54
+ "loss": 0.0873,
55
+ "step": 150
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_f1_macro": 0.0,
60
+ "eval_f1_micro": 0.0,
61
+ "eval_loss": 0.07998213171958923,
62
+ "eval_precision": 0.0,
63
+ "eval_recall": 0.0,
64
+ "eval_runtime": 1.6587,
65
+ "eval_samples_per_second": 373.195,
66
+ "eval_steps_per_second": 15.675,
67
+ "step": 156
68
+ },
69
+ {
70
+ "epoch": 3.8461538461538463,
71
+ "grad_norm": 6965.6220703125,
72
+ "learning_rate": 4.9029411764705883e-05,
73
+ "loss": 0.0791,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 4.0,
78
+ "eval_f1_macro": 0.0,
79
+ "eval_f1_micro": 0.0,
80
+ "eval_loss": 0.07983831316232681,
81
+ "eval_precision": 0.0,
82
+ "eval_recall": 0.0,
83
+ "eval_runtime": 1.6476,
84
+ "eval_samples_per_second": 375.688,
85
+ "eval_steps_per_second": 15.78,
86
+ "step": 208
87
+ },
88
+ {
89
+ "epoch": 4.8076923076923075,
90
+ "grad_norm": 4831.40478515625,
91
+ "learning_rate": 4.8539215686274515e-05,
92
+ "loss": 0.0777,
93
+ "step": 250
94
+ },
95
+ {
96
+ "epoch": 5.0,
97
+ "eval_f1_macro": 0.0,
98
+ "eval_f1_micro": 0.0,
99
+ "eval_loss": 0.07502060383558273,
100
+ "eval_precision": 0.0,
101
+ "eval_recall": 0.0,
102
+ "eval_runtime": 1.6796,
103
+ "eval_samples_per_second": 368.535,
104
+ "eval_steps_per_second": 15.48,
105
+ "step": 260
106
+ },
107
+ {
108
+ "epoch": 5.769230769230769,
109
+ "grad_norm": 7363.94580078125,
110
+ "learning_rate": 4.804901960784314e-05,
111
+ "loss": 0.0716,
112
+ "step": 300
113
+ },
114
+ {
115
+ "epoch": 6.0,
116
+ "eval_f1_macro": 0.038923099933562195,
117
+ "eval_f1_micro": 0.47416413373860183,
118
+ "eval_loss": 0.0661635547876358,
119
+ "eval_precision": 0.9069767441860465,
120
+ "eval_recall": 0.32098765432098764,
121
+ "eval_runtime": 1.6554,
122
+ "eval_samples_per_second": 373.937,
123
+ "eval_steps_per_second": 15.707,
124
+ "step": 312
125
+ },
126
+ {
127
+ "epoch": 6.730769230769231,
128
+ "grad_norm": 6673.61376953125,
129
+ "learning_rate": 4.7558823529411766e-05,
130
+ "loss": 0.0612,
131
+ "step": 350
132
+ },
133
+ {
134
+ "epoch": 7.0,
135
+ "eval_f1_macro": 0.04364161045234521,
136
+ "eval_f1_micro": 0.502835538752363,
137
+ "eval_loss": 0.061204444617033005,
138
+ "eval_precision": 0.8085106382978723,
139
+ "eval_recall": 0.36488340192043894,
140
+ "eval_runtime": 1.6748,
141
+ "eval_samples_per_second": 369.593,
142
+ "eval_steps_per_second": 15.524,
143
+ "step": 364
144
+ },
145
+ {
146
+ "epoch": 7.6923076923076925,
147
+ "grad_norm": 6426.9541015625,
148
+ "learning_rate": 4.70686274509804e-05,
149
+ "loss": 0.0526,
150
+ "step": 400
151
+ },
152
+ {
153
+ "epoch": 8.0,
154
+ "eval_f1_macro": 0.07288718124374782,
155
+ "eval_f1_micro": 0.5546522131887985,
156
+ "eval_loss": 0.05695081874728203,
157
+ "eval_precision": 0.8121693121693122,
158
+ "eval_recall": 0.42112482853223593,
159
+ "eval_runtime": 1.665,
160
+ "eval_samples_per_second": 371.782,
161
+ "eval_steps_per_second": 15.616,
162
+ "step": 416
163
+ },
164
+ {
165
+ "epoch": 8.653846153846153,
166
+ "grad_norm": 9758.2529296875,
167
+ "learning_rate": 4.6578431372549016e-05,
168
+ "loss": 0.0433,
169
+ "step": 450
170
+ },
171
+ {
172
+ "epoch": 9.0,
173
+ "eval_f1_macro": 0.07471540750966602,
174
+ "eval_f1_micro": 0.545950864422202,
175
+ "eval_loss": 0.05365221947431564,
176
+ "eval_precision": 0.8108108108108109,
177
+ "eval_recall": 0.411522633744856,
178
+ "eval_runtime": 1.6749,
179
+ "eval_samples_per_second": 369.564,
180
+ "eval_steps_per_second": 15.523,
181
+ "step": 468
182
+ },
183
+ {
184
+ "epoch": 9.615384615384615,
185
+ "grad_norm": 3793.51953125,
186
+ "learning_rate": 4.608823529411765e-05,
187
+ "loss": 0.0397,
188
+ "step": 500
189
+ },
190
+ {
191
+ "epoch": 10.0,
192
+ "eval_f1_macro": 0.12734138202225653,
193
+ "eval_f1_micro": 0.5939086294416244,
194
+ "eval_loss": 0.052679501473903656,
195
+ "eval_precision": 0.7748344370860927,
196
+ "eval_recall": 0.48148148148148145,
197
+ "eval_runtime": 1.6561,
198
+ "eval_samples_per_second": 373.762,
199
+ "eval_steps_per_second": 15.699,
200
+ "step": 520
201
+ },
202
+ {
203
+ "epoch": 10.576923076923077,
204
+ "grad_norm": 4446.76611328125,
205
+ "learning_rate": 4.559803921568628e-05,
206
+ "loss": 0.0329,
207
+ "step": 550
208
+ },
209
+ {
210
+ "epoch": 11.0,
211
+ "eval_f1_macro": 0.1493812021209619,
212
+ "eval_f1_micro": 0.609735269000854,
213
+ "eval_loss": 0.05011816695332527,
214
+ "eval_precision": 0.8076923076923077,
215
+ "eval_recall": 0.4897119341563786,
216
+ "eval_runtime": 1.6806,
217
+ "eval_samples_per_second": 368.324,
218
+ "eval_steps_per_second": 15.471,
219
+ "step": 572
220
+ },
221
+ {
222
+ "epoch": 11.538461538461538,
223
+ "grad_norm": 5606.7880859375,
224
+ "learning_rate": 4.51078431372549e-05,
225
+ "loss": 0.0286,
226
+ "step": 600
227
+ },
228
+ {
229
+ "epoch": 12.0,
230
+ "eval_f1_macro": 0.19655294907558352,
231
+ "eval_f1_micro": 0.6096959737058341,
232
+ "eval_loss": 0.05061562359333038,
233
+ "eval_precision": 0.7602459016393442,
234
+ "eval_recall": 0.5089163237311386,
235
+ "eval_runtime": 1.6713,
236
+ "eval_samples_per_second": 370.373,
237
+ "eval_steps_per_second": 15.557,
238
+ "step": 624
239
+ },
240
+ {
241
+ "epoch": 12.5,
242
+ "grad_norm": 5415.85546875,
243
+ "learning_rate": 4.461764705882353e-05,
244
+ "loss": 0.0254,
245
+ "step": 650
246
+ },
247
+ {
248
+ "epoch": 13.0,
249
+ "eval_f1_macro": 0.20270281237441773,
250
+ "eval_f1_micro": 0.6230831315577078,
251
+ "eval_loss": 0.04895725101232529,
252
+ "eval_precision": 0.7568627450980392,
253
+ "eval_recall": 0.5294924554183813,
254
+ "eval_runtime": 2.2359,
255
+ "eval_samples_per_second": 276.851,
256
+ "eval_steps_per_second": 11.629,
257
+ "step": 676
258
+ },
259
+ {
260
+ "epoch": 13.461538461538462,
261
+ "grad_norm": 6214.2744140625,
262
+ "learning_rate": 4.412745098039216e-05,
263
+ "loss": 0.023,
264
+ "step": 700
265
+ },
266
+ {
267
+ "epoch": 14.0,
268
+ "eval_f1_macro": 0.21593787964288247,
269
+ "eval_f1_micro": 0.6309904153354633,
270
+ "eval_loss": 0.047714490443468094,
271
+ "eval_precision": 0.7552581261950286,
272
+ "eval_recall": 0.541838134430727,
273
+ "eval_runtime": 1.7054,
274
+ "eval_samples_per_second": 362.956,
275
+ "eval_steps_per_second": 15.245,
276
+ "step": 728
277
+ },
278
+ {
279
+ "epoch": 14.423076923076923,
280
+ "grad_norm": 5322.5205078125,
281
+ "learning_rate": 4.363725490196079e-05,
282
+ "loss": 0.0202,
283
+ "step": 750
284
+ },
285
+ {
286
+ "epoch": 15.0,
287
+ "eval_f1_macro": 0.2127885503653234,
288
+ "eval_f1_micro": 0.6297739672642245,
289
+ "eval_loss": 0.04875025525689125,
290
+ "eval_precision": 0.7292418772563177,
291
+ "eval_recall": 0.5541838134430727,
292
+ "eval_runtime": 1.6728,
293
+ "eval_samples_per_second": 370.029,
294
+ "eval_steps_per_second": 15.542,
295
+ "step": 780
296
+ },
297
+ {
298
+ "epoch": 15.384615384615385,
299
+ "grad_norm": 3209.152587890625,
300
+ "learning_rate": 4.3147058823529413e-05,
301
+ "loss": 0.0186,
302
+ "step": 800
303
+ },
304
+ {
305
+ "epoch": 16.0,
306
+ "eval_f1_macro": 0.2234013473846607,
307
+ "eval_f1_micro": 0.6476484194294526,
308
+ "eval_loss": 0.04788675159215927,
309
+ "eval_precision": 0.7394366197183099,
310
+ "eval_recall": 0.5761316872427984,
311
+ "eval_runtime": 1.6703,
312
+ "eval_samples_per_second": 370.591,
313
+ "eval_steps_per_second": 15.566,
314
+ "step": 832
315
+ },
316
+ {
317
+ "epoch": 16.346153846153847,
318
+ "grad_norm": 6189.79296875,
319
+ "learning_rate": 4.265686274509804e-05,
320
+ "loss": 0.0166,
321
+ "step": 850
322
+ },
323
+ {
324
+ "epoch": 17.0,
325
+ "eval_f1_macro": 0.2461245877407925,
326
+ "eval_f1_micro": 0.6392067124332571,
327
+ "eval_loss": 0.047994960099458694,
328
+ "eval_precision": 0.7199312714776632,
329
+ "eval_recall": 0.5747599451303155,
330
+ "eval_runtime": 1.6784,
331
+ "eval_samples_per_second": 368.8,
332
+ "eval_steps_per_second": 15.491,
333
+ "step": 884
334
+ },
335
+ {
336
+ "epoch": 17.307692307692307,
337
+ "grad_norm": 2584.114013671875,
338
+ "learning_rate": 4.216666666666667e-05,
339
+ "loss": 0.0148,
340
+ "step": 900
341
+ },
342
+ {
343
+ "epoch": 18.0,
344
+ "eval_f1_macro": 0.2672202679954545,
345
+ "eval_f1_micro": 0.6533742331288344,
346
+ "eval_loss": 0.04750063270330429,
347
+ "eval_precision": 0.7408695652173913,
348
+ "eval_recall": 0.5843621399176955,
349
+ "eval_runtime": 1.6744,
350
+ "eval_samples_per_second": 369.683,
351
+ "eval_steps_per_second": 15.528,
352
+ "step": 936
353
+ },
354
+ {
355
+ "epoch": 18.26923076923077,
356
+ "grad_norm": 2617.191650390625,
357
+ "learning_rate": 4.1676470588235296e-05,
358
+ "loss": 0.0128,
359
+ "step": 950
360
+ },
361
+ {
362
+ "epoch": 19.0,
363
+ "eval_f1_macro": 0.25478648592377506,
364
+ "eval_f1_micro": 0.6585365853658537,
365
+ "eval_loss": 0.04794125631451607,
366
+ "eval_precision": 0.7409948542024014,
367
+ "eval_recall": 0.5925925925925926,
368
+ "eval_runtime": 1.6751,
369
+ "eval_samples_per_second": 369.525,
370
+ "eval_steps_per_second": 15.521,
371
+ "step": 988
372
+ },
373
+ {
374
+ "epoch": 19.23076923076923,
375
+ "grad_norm": 2930.471923828125,
376
+ "learning_rate": 4.118627450980392e-05,
377
+ "loss": 0.0116,
378
+ "step": 1000
379
+ },
380
+ {
381
+ "epoch": 20.0,
382
+ "eval_f1_macro": 0.2563341548388003,
383
+ "eval_f1_micro": 0.6470143613000756,
384
+ "eval_loss": 0.04850601404905319,
385
+ "eval_precision": 0.7205387205387206,
386
+ "eval_recall": 0.5871056241426612,
387
+ "eval_runtime": 1.6688,
388
+ "eval_samples_per_second": 370.936,
389
+ "eval_steps_per_second": 15.58,
390
+ "step": 1040
391
+ },
392
+ {
393
+ "epoch": 20.192307692307693,
394
+ "grad_norm": 2841.39111328125,
395
+ "learning_rate": 4.069607843137255e-05,
396
+ "loss": 0.0106,
397
+ "step": 1050
398
+ },
399
+ {
400
+ "epoch": 21.0,
401
+ "eval_f1_macro": 0.2594183553248198,
402
+ "eval_f1_micro": 0.6483600305110603,
403
+ "eval_loss": 0.04854930564761162,
404
+ "eval_precision": 0.7302405498281787,
405
+ "eval_recall": 0.5829903978052127,
406
+ "eval_runtime": 1.6818,
407
+ "eval_samples_per_second": 368.068,
408
+ "eval_steps_per_second": 15.46,
409
+ "step": 1092
410
+ }
411
+ ],
412
+ "logging_steps": 50,
413
+ "max_steps": 5200,
414
+ "num_input_tokens_seen": 0,
415
+ "num_train_epochs": 100,
416
+ "save_steps": 500,
417
+ "stateful_callbacks": {
418
+ "EarlyStoppingCallback": {
419
+ "args": {
420
+ "early_stopping_patience": 5,
421
+ "early_stopping_threshold": 0.0
422
+ },
423
+ "attributes": {
424
+ "early_stopping_patience_counter": 2
425
+ }
426
+ },
427
+ "TrainerControl": {
428
+ "args": {
429
+ "should_epoch_stop": false,
430
+ "should_evaluate": false,
431
+ "should_log": false,
432
+ "should_save": true,
433
+ "should_training_stop": false
434
+ },
435
+ "attributes": {}
436
+ }
437
+ },
438
+ "total_flos": 6866097847547904.0,
439
+ "train_batch_size": 24,
440
+ "trial_name": null,
441
+ "trial_params": null
442
+ }
checkpoint-1092/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f05e8606862008bfc17115034db9429cc42bab3677cf65b2b782cae0ed9dfed
3
+ size 5368
checkpoint-1092/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1144/config.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1005",
13
+ "1": "T1021",
14
+ "2": "T1027",
15
+ "3": "T1033",
16
+ "4": "T1036",
17
+ "5": "T1041",
18
+ "6": "T1046",
19
+ "7": "T1048",
20
+ "8": "T1049",
21
+ "9": "T1053",
22
+ "10": "T1055",
23
+ "11": "T1056",
24
+ "12": "T1057",
25
+ "13": "T1059",
26
+ "14": "T1070",
27
+ "15": "T1071",
28
+ "16": "T1074",
29
+ "17": "T1078",
30
+ "18": "T1082",
31
+ "19": "T1083",
32
+ "20": "T1098",
33
+ "21": "T1102",
34
+ "22": "T1105",
35
+ "23": "T1110",
36
+ "24": "T1113",
37
+ "25": "T1114",
38
+ "26": "T1115",
39
+ "27": "T1132",
40
+ "28": "T1137",
41
+ "29": "T1140",
42
+ "30": "T1189",
43
+ "31": "T1190",
44
+ "32": "T1195",
45
+ "33": "T1203",
46
+ "34": "T1204",
47
+ "35": "T1218",
48
+ "36": "T1486",
49
+ "37": "T1491",
50
+ "38": "T1496",
51
+ "39": "T1497",
52
+ "40": "T1499",
53
+ "41": "T1528",
54
+ "42": "T1539",
55
+ "43": "T1547",
56
+ "44": "T1555",
57
+ "45": "T1557",
58
+ "46": "T1562",
59
+ "47": "T1564",
60
+ "48": "T1566",
61
+ "49": "T1567",
62
+ "50": "T1573",
63
+ "51": "T1574",
64
+ "52": "T1583",
65
+ "53": "T1586",
66
+ "54": "T1589",
67
+ "55": "T1606",
68
+ "56": "T1608",
69
+ "57": "T1614",
70
+ "58": "T1620",
71
+ "59": "T1623.001",
72
+ "60": "T1631.001"
73
+ },
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 3072,
76
+ "label2id": {
77
+ "T1005": 0,
78
+ "T1021": 1,
79
+ "T1027": 2,
80
+ "T1033": 3,
81
+ "T1036": 4,
82
+ "T1041": 5,
83
+ "T1046": 6,
84
+ "T1048": 7,
85
+ "T1049": 8,
86
+ "T1053": 9,
87
+ "T1055": 10,
88
+ "T1056": 11,
89
+ "T1057": 12,
90
+ "T1059": 13,
91
+ "T1070": 14,
92
+ "T1071": 15,
93
+ "T1074": 16,
94
+ "T1078": 17,
95
+ "T1082": 18,
96
+ "T1083": 19,
97
+ "T1098": 20,
98
+ "T1102": 21,
99
+ "T1105": 22,
100
+ "T1110": 23,
101
+ "T1113": 24,
102
+ "T1114": 25,
103
+ "T1115": 26,
104
+ "T1132": 27,
105
+ "T1137": 28,
106
+ "T1140": 29,
107
+ "T1189": 30,
108
+ "T1190": 31,
109
+ "T1195": 32,
110
+ "T1203": 33,
111
+ "T1204": 34,
112
+ "T1218": 35,
113
+ "T1486": 36,
114
+ "T1491": 37,
115
+ "T1496": 38,
116
+ "T1497": 39,
117
+ "T1499": 40,
118
+ "T1528": 41,
119
+ "T1539": 42,
120
+ "T1547": 43,
121
+ "T1555": 44,
122
+ "T1557": 45,
123
+ "T1562": 46,
124
+ "T1564": 47,
125
+ "T1566": 48,
126
+ "T1567": 49,
127
+ "T1573": 50,
128
+ "T1574": 51,
129
+ "T1583": 52,
130
+ "T1586": 53,
131
+ "T1589": 54,
132
+ "T1606": 55,
133
+ "T1608": 56,
134
+ "T1614": 57,
135
+ "T1620": 58,
136
+ "T1623.001": 59,
137
+ "T1631.001": 60
138
+ },
139
+ "layer_norm_eps": 1e-12,
140
+ "max_position_embeddings": 512,
141
+ "model_type": "bert",
142
+ "num_attention_heads": 12,
143
+ "num_hidden_layers": 12,
144
+ "pad_token_id": 0,
145
+ "position_embedding_type": "absolute",
146
+ "problem_type": "multi_label_classification",
147
+ "torch_dtype": "float32",
148
+ "transformers_version": "4.55.2",
149
+ "type_vocab_size": 2,
150
+ "use_cache": true,
151
+ "vocab_size": 30522
152
+ }
checkpoint-1144/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53292d8292e5eb8a125e883da4859b67e125c2bee626e465817107b0b47253db
3
+ size 438140132
checkpoint-1144/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2648001073013c1199c297f7ad1e2eae82e586f6a7e90e534e4623fff849df
3
+ size 876401210
checkpoint-1144/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d08838643a7c90e6466d579ae7c80616e23efb3384b0f77a49923144d1295434
3
+ size 14244
checkpoint-1144/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
3
+ size 988
checkpoint-1144/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbc4ce20d003fb79e97683490de717af355afe30a4bbaf6a848254efd86ee97
3
+ size 1064
checkpoint-1144/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-1144/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1144/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1144/trainer_state.json ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 988,
3
+ "best_metric": 0.6585365853658537,
4
+ "best_model_checkpoint": "./cysecbert-ttp-annoctr_step2/checkpoint-988",
5
+ "epoch": 22.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1144,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9615384615384616,
14
+ "grad_norm": 35970.37109375,
15
+ "learning_rate": 2.45e-05,
16
+ "loss": 0.5612,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_f1_macro": 0.0,
22
+ "eval_f1_micro": 0.0,
23
+ "eval_loss": 0.33502161502838135,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 1.6495,
27
+ "eval_samples_per_second": 375.276,
28
+ "eval_steps_per_second": 15.763,
29
+ "step": 52
30
+ },
31
+ {
32
+ "epoch": 1.9230769230769231,
33
+ "grad_norm": 10958.1689453125,
34
+ "learning_rate": 4.9500000000000004e-05,
35
+ "loss": 0.214,
36
+ "step": 100
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_f1_macro": 0.0,
41
+ "eval_f1_micro": 0.0,
42
+ "eval_loss": 0.09798076748847961,
43
+ "eval_precision": 0.0,
44
+ "eval_recall": 0.0,
45
+ "eval_runtime": 1.6513,
46
+ "eval_samples_per_second": 374.861,
47
+ "eval_steps_per_second": 15.745,
48
+ "step": 104
49
+ },
50
+ {
51
+ "epoch": 2.8846153846153846,
52
+ "grad_norm": 5634.60546875,
53
+ "learning_rate": 4.951960784313726e-05,
54
+ "loss": 0.0873,
55
+ "step": 150
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_f1_macro": 0.0,
60
+ "eval_f1_micro": 0.0,
61
+ "eval_loss": 0.07998213171958923,
62
+ "eval_precision": 0.0,
63
+ "eval_recall": 0.0,
64
+ "eval_runtime": 1.6587,
65
+ "eval_samples_per_second": 373.195,
66
+ "eval_steps_per_second": 15.675,
67
+ "step": 156
68
+ },
69
+ {
70
+ "epoch": 3.8461538461538463,
71
+ "grad_norm": 6965.6220703125,
72
+ "learning_rate": 4.9029411764705883e-05,
73
+ "loss": 0.0791,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 4.0,
78
+ "eval_f1_macro": 0.0,
79
+ "eval_f1_micro": 0.0,
80
+ "eval_loss": 0.07983831316232681,
81
+ "eval_precision": 0.0,
82
+ "eval_recall": 0.0,
83
+ "eval_runtime": 1.6476,
84
+ "eval_samples_per_second": 375.688,
85
+ "eval_steps_per_second": 15.78,
86
+ "step": 208
87
+ },
88
+ {
89
+ "epoch": 4.8076923076923075,
90
+ "grad_norm": 4831.40478515625,
91
+ "learning_rate": 4.8539215686274515e-05,
92
+ "loss": 0.0777,
93
+ "step": 250
94
+ },
95
+ {
96
+ "epoch": 5.0,
97
+ "eval_f1_macro": 0.0,
98
+ "eval_f1_micro": 0.0,
99
+ "eval_loss": 0.07502060383558273,
100
+ "eval_precision": 0.0,
101
+ "eval_recall": 0.0,
102
+ "eval_runtime": 1.6796,
103
+ "eval_samples_per_second": 368.535,
104
+ "eval_steps_per_second": 15.48,
105
+ "step": 260
106
+ },
107
+ {
108
+ "epoch": 5.769230769230769,
109
+ "grad_norm": 7363.94580078125,
110
+ "learning_rate": 4.804901960784314e-05,
111
+ "loss": 0.0716,
112
+ "step": 300
113
+ },
114
+ {
115
+ "epoch": 6.0,
116
+ "eval_f1_macro": 0.038923099933562195,
117
+ "eval_f1_micro": 0.47416413373860183,
118
+ "eval_loss": 0.0661635547876358,
119
+ "eval_precision": 0.9069767441860465,
120
+ "eval_recall": 0.32098765432098764,
121
+ "eval_runtime": 1.6554,
122
+ "eval_samples_per_second": 373.937,
123
+ "eval_steps_per_second": 15.707,
124
+ "step": 312
125
+ },
126
+ {
127
+ "epoch": 6.730769230769231,
128
+ "grad_norm": 6673.61376953125,
129
+ "learning_rate": 4.7558823529411766e-05,
130
+ "loss": 0.0612,
131
+ "step": 350
132
+ },
133
+ {
134
+ "epoch": 7.0,
135
+ "eval_f1_macro": 0.04364161045234521,
136
+ "eval_f1_micro": 0.502835538752363,
137
+ "eval_loss": 0.061204444617033005,
138
+ "eval_precision": 0.8085106382978723,
139
+ "eval_recall": 0.36488340192043894,
140
+ "eval_runtime": 1.6748,
141
+ "eval_samples_per_second": 369.593,
142
+ "eval_steps_per_second": 15.524,
143
+ "step": 364
144
+ },
145
+ {
146
+ "epoch": 7.6923076923076925,
147
+ "grad_norm": 6426.9541015625,
148
+ "learning_rate": 4.70686274509804e-05,
149
+ "loss": 0.0526,
150
+ "step": 400
151
+ },
152
+ {
153
+ "epoch": 8.0,
154
+ "eval_f1_macro": 0.07288718124374782,
155
+ "eval_f1_micro": 0.5546522131887985,
156
+ "eval_loss": 0.05695081874728203,
157
+ "eval_precision": 0.8121693121693122,
158
+ "eval_recall": 0.42112482853223593,
159
+ "eval_runtime": 1.665,
160
+ "eval_samples_per_second": 371.782,
161
+ "eval_steps_per_second": 15.616,
162
+ "step": 416
163
+ },
164
+ {
165
+ "epoch": 8.653846153846153,
166
+ "grad_norm": 9758.2529296875,
167
+ "learning_rate": 4.6578431372549016e-05,
168
+ "loss": 0.0433,
169
+ "step": 450
170
+ },
171
+ {
172
+ "epoch": 9.0,
173
+ "eval_f1_macro": 0.07471540750966602,
174
+ "eval_f1_micro": 0.545950864422202,
175
+ "eval_loss": 0.05365221947431564,
176
+ "eval_precision": 0.8108108108108109,
177
+ "eval_recall": 0.411522633744856,
178
+ "eval_runtime": 1.6749,
179
+ "eval_samples_per_second": 369.564,
180
+ "eval_steps_per_second": 15.523,
181
+ "step": 468
182
+ },
183
+ {
184
+ "epoch": 9.615384615384615,
185
+ "grad_norm": 3793.51953125,
186
+ "learning_rate": 4.608823529411765e-05,
187
+ "loss": 0.0397,
188
+ "step": 500
189
+ },
190
+ {
191
+ "epoch": 10.0,
192
+ "eval_f1_macro": 0.12734138202225653,
193
+ "eval_f1_micro": 0.5939086294416244,
194
+ "eval_loss": 0.052679501473903656,
195
+ "eval_precision": 0.7748344370860927,
196
+ "eval_recall": 0.48148148148148145,
197
+ "eval_runtime": 1.6561,
198
+ "eval_samples_per_second": 373.762,
199
+ "eval_steps_per_second": 15.699,
200
+ "step": 520
201
+ },
202
+ {
203
+ "epoch": 10.576923076923077,
204
+ "grad_norm": 4446.76611328125,
205
+ "learning_rate": 4.559803921568628e-05,
206
+ "loss": 0.0329,
207
+ "step": 550
208
+ },
209
+ {
210
+ "epoch": 11.0,
211
+ "eval_f1_macro": 0.1493812021209619,
212
+ "eval_f1_micro": 0.609735269000854,
213
+ "eval_loss": 0.05011816695332527,
214
+ "eval_precision": 0.8076923076923077,
215
+ "eval_recall": 0.4897119341563786,
216
+ "eval_runtime": 1.6806,
217
+ "eval_samples_per_second": 368.324,
218
+ "eval_steps_per_second": 15.471,
219
+ "step": 572
220
+ },
221
+ {
222
+ "epoch": 11.538461538461538,
223
+ "grad_norm": 5606.7880859375,
224
+ "learning_rate": 4.51078431372549e-05,
225
+ "loss": 0.0286,
226
+ "step": 600
227
+ },
228
+ {
229
+ "epoch": 12.0,
230
+ "eval_f1_macro": 0.19655294907558352,
231
+ "eval_f1_micro": 0.6096959737058341,
232
+ "eval_loss": 0.05061562359333038,
233
+ "eval_precision": 0.7602459016393442,
234
+ "eval_recall": 0.5089163237311386,
235
+ "eval_runtime": 1.6713,
236
+ "eval_samples_per_second": 370.373,
237
+ "eval_steps_per_second": 15.557,
238
+ "step": 624
239
+ },
240
+ {
241
+ "epoch": 12.5,
242
+ "grad_norm": 5415.85546875,
243
+ "learning_rate": 4.461764705882353e-05,
244
+ "loss": 0.0254,
245
+ "step": 650
246
+ },
247
+ {
248
+ "epoch": 13.0,
249
+ "eval_f1_macro": 0.20270281237441773,
250
+ "eval_f1_micro": 0.6230831315577078,
251
+ "eval_loss": 0.04895725101232529,
252
+ "eval_precision": 0.7568627450980392,
253
+ "eval_recall": 0.5294924554183813,
254
+ "eval_runtime": 2.2359,
255
+ "eval_samples_per_second": 276.851,
256
+ "eval_steps_per_second": 11.629,
257
+ "step": 676
258
+ },
259
+ {
260
+ "epoch": 13.461538461538462,
261
+ "grad_norm": 6214.2744140625,
262
+ "learning_rate": 4.412745098039216e-05,
263
+ "loss": 0.023,
264
+ "step": 700
265
+ },
266
+ {
267
+ "epoch": 14.0,
268
+ "eval_f1_macro": 0.21593787964288247,
269
+ "eval_f1_micro": 0.6309904153354633,
270
+ "eval_loss": 0.047714490443468094,
271
+ "eval_precision": 0.7552581261950286,
272
+ "eval_recall": 0.541838134430727,
273
+ "eval_runtime": 1.7054,
274
+ "eval_samples_per_second": 362.956,
275
+ "eval_steps_per_second": 15.245,
276
+ "step": 728
277
+ },
278
+ {
279
+ "epoch": 14.423076923076923,
280
+ "grad_norm": 5322.5205078125,
281
+ "learning_rate": 4.363725490196079e-05,
282
+ "loss": 0.0202,
283
+ "step": 750
284
+ },
285
+ {
286
+ "epoch": 15.0,
287
+ "eval_f1_macro": 0.2127885503653234,
288
+ "eval_f1_micro": 0.6297739672642245,
289
+ "eval_loss": 0.04875025525689125,
290
+ "eval_precision": 0.7292418772563177,
291
+ "eval_recall": 0.5541838134430727,
292
+ "eval_runtime": 1.6728,
293
+ "eval_samples_per_second": 370.029,
294
+ "eval_steps_per_second": 15.542,
295
+ "step": 780
296
+ },
297
+ {
298
+ "epoch": 15.384615384615385,
299
+ "grad_norm": 3209.152587890625,
300
+ "learning_rate": 4.3147058823529413e-05,
301
+ "loss": 0.0186,
302
+ "step": 800
303
+ },
304
+ {
305
+ "epoch": 16.0,
306
+ "eval_f1_macro": 0.2234013473846607,
307
+ "eval_f1_micro": 0.6476484194294526,
308
+ "eval_loss": 0.04788675159215927,
309
+ "eval_precision": 0.7394366197183099,
310
+ "eval_recall": 0.5761316872427984,
311
+ "eval_runtime": 1.6703,
312
+ "eval_samples_per_second": 370.591,
313
+ "eval_steps_per_second": 15.566,
314
+ "step": 832
315
+ },
316
+ {
317
+ "epoch": 16.346153846153847,
318
+ "grad_norm": 6189.79296875,
319
+ "learning_rate": 4.265686274509804e-05,
320
+ "loss": 0.0166,
321
+ "step": 850
322
+ },
323
+ {
324
+ "epoch": 17.0,
325
+ "eval_f1_macro": 0.2461245877407925,
326
+ "eval_f1_micro": 0.6392067124332571,
327
+ "eval_loss": 0.047994960099458694,
328
+ "eval_precision": 0.7199312714776632,
329
+ "eval_recall": 0.5747599451303155,
330
+ "eval_runtime": 1.6784,
331
+ "eval_samples_per_second": 368.8,
332
+ "eval_steps_per_second": 15.491,
333
+ "step": 884
334
+ },
335
+ {
336
+ "epoch": 17.307692307692307,
337
+ "grad_norm": 2584.114013671875,
338
+ "learning_rate": 4.216666666666667e-05,
339
+ "loss": 0.0148,
340
+ "step": 900
341
+ },
342
+ {
343
+ "epoch": 18.0,
344
+ "eval_f1_macro": 0.2672202679954545,
345
+ "eval_f1_micro": 0.6533742331288344,
346
+ "eval_loss": 0.04750063270330429,
347
+ "eval_precision": 0.7408695652173913,
348
+ "eval_recall": 0.5843621399176955,
349
+ "eval_runtime": 1.6744,
350
+ "eval_samples_per_second": 369.683,
351
+ "eval_steps_per_second": 15.528,
352
+ "step": 936
353
+ },
354
+ {
355
+ "epoch": 18.26923076923077,
356
+ "grad_norm": 2617.191650390625,
357
+ "learning_rate": 4.1676470588235296e-05,
358
+ "loss": 0.0128,
359
+ "step": 950
360
+ },
361
+ {
362
+ "epoch": 19.0,
363
+ "eval_f1_macro": 0.25478648592377506,
364
+ "eval_f1_micro": 0.6585365853658537,
365
+ "eval_loss": 0.04794125631451607,
366
+ "eval_precision": 0.7409948542024014,
367
+ "eval_recall": 0.5925925925925926,
368
+ "eval_runtime": 1.6751,
369
+ "eval_samples_per_second": 369.525,
370
+ "eval_steps_per_second": 15.521,
371
+ "step": 988
372
+ },
373
+ {
374
+ "epoch": 19.23076923076923,
375
+ "grad_norm": 2930.471923828125,
376
+ "learning_rate": 4.118627450980392e-05,
377
+ "loss": 0.0116,
378
+ "step": 1000
379
+ },
380
+ {
381
+ "epoch": 20.0,
382
+ "eval_f1_macro": 0.2563341548388003,
383
+ "eval_f1_micro": 0.6470143613000756,
384
+ "eval_loss": 0.04850601404905319,
385
+ "eval_precision": 0.7205387205387206,
386
+ "eval_recall": 0.5871056241426612,
387
+ "eval_runtime": 1.6688,
388
+ "eval_samples_per_second": 370.936,
389
+ "eval_steps_per_second": 15.58,
390
+ "step": 1040
391
+ },
392
+ {
393
+ "epoch": 20.192307692307693,
394
+ "grad_norm": 2841.39111328125,
395
+ "learning_rate": 4.069607843137255e-05,
396
+ "loss": 0.0106,
397
+ "step": 1050
398
+ },
399
+ {
400
+ "epoch": 21.0,
401
+ "eval_f1_macro": 0.2594183553248198,
402
+ "eval_f1_micro": 0.6483600305110603,
403
+ "eval_loss": 0.04854930564761162,
404
+ "eval_precision": 0.7302405498281787,
405
+ "eval_recall": 0.5829903978052127,
406
+ "eval_runtime": 1.6818,
407
+ "eval_samples_per_second": 368.068,
408
+ "eval_steps_per_second": 15.46,
409
+ "step": 1092
410
+ },
411
+ {
412
+ "epoch": 21.153846153846153,
413
+ "grad_norm": 2682.0166015625,
414
+ "learning_rate": 4.020588235294118e-05,
415
+ "loss": 0.0096,
416
+ "step": 1100
417
+ },
418
+ {
419
+ "epoch": 22.0,
420
+ "eval_f1_macro": 0.2448587100305824,
421
+ "eval_f1_micro": 0.6454478164322723,
422
+ "eval_loss": 0.048451464623212814,
423
+ "eval_precision": 0.7009646302250804,
424
+ "eval_recall": 0.598079561042524,
425
+ "eval_runtime": 1.6836,
426
+ "eval_samples_per_second": 367.655,
427
+ "eval_steps_per_second": 15.443,
428
+ "step": 1144
429
+ }
430
+ ],
431
+ "logging_steps": 50,
432
+ "max_steps": 5200,
433
+ "num_input_tokens_seen": 0,
434
+ "num_train_epochs": 100,
435
+ "save_steps": 500,
436
+ "stateful_callbacks": {
437
+ "EarlyStoppingCallback": {
438
+ "args": {
439
+ "early_stopping_patience": 5,
440
+ "early_stopping_threshold": 0.0
441
+ },
442
+ "attributes": {
443
+ "early_stopping_patience_counter": 3
444
+ }
445
+ },
446
+ "TrainerControl": {
447
+ "args": {
448
+ "should_epoch_stop": false,
449
+ "should_evaluate": false,
450
+ "should_log": false,
451
+ "should_save": true,
452
+ "should_training_stop": false
453
+ },
454
+ "attributes": {}
455
+ }
456
+ },
457
+ "total_flos": 7193054887907328.0,
458
+ "train_batch_size": 24,
459
+ "trial_name": null,
460
+ "trial_params": null
461
+ }
checkpoint-1144/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f05e8606862008bfc17115034db9429cc42bab3677cf65b2b782cae0ed9dfed
3
+ size 5368
checkpoint-1144/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1196/config.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "T1005",
13
+ "1": "T1021",
14
+ "2": "T1027",
15
+ "3": "T1033",
16
+ "4": "T1036",
17
+ "5": "T1041",
18
+ "6": "T1046",
19
+ "7": "T1048",
20
+ "8": "T1049",
21
+ "9": "T1053",
22
+ "10": "T1055",
23
+ "11": "T1056",
24
+ "12": "T1057",
25
+ "13": "T1059",
26
+ "14": "T1070",
27
+ "15": "T1071",
28
+ "16": "T1074",
29
+ "17": "T1078",
30
+ "18": "T1082",
31
+ "19": "T1083",
32
+ "20": "T1098",
33
+ "21": "T1102",
34
+ "22": "T1105",
35
+ "23": "T1110",
36
+ "24": "T1113",
37
+ "25": "T1114",
38
+ "26": "T1115",
39
+ "27": "T1132",
40
+ "28": "T1137",
41
+ "29": "T1140",
42
+ "30": "T1189",
43
+ "31": "T1190",
44
+ "32": "T1195",
45
+ "33": "T1203",
46
+ "34": "T1204",
47
+ "35": "T1218",
48
+ "36": "T1486",
49
+ "37": "T1491",
50
+ "38": "T1496",
51
+ "39": "T1497",
52
+ "40": "T1499",
53
+ "41": "T1528",
54
+ "42": "T1539",
55
+ "43": "T1547",
56
+ "44": "T1555",
57
+ "45": "T1557",
58
+ "46": "T1562",
59
+ "47": "T1564",
60
+ "48": "T1566",
61
+ "49": "T1567",
62
+ "50": "T1573",
63
+ "51": "T1574",
64
+ "52": "T1583",
65
+ "53": "T1586",
66
+ "54": "T1589",
67
+ "55": "T1606",
68
+ "56": "T1608",
69
+ "57": "T1614",
70
+ "58": "T1620",
71
+ "59": "T1623.001",
72
+ "60": "T1631.001"
73
+ },
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 3072,
76
+ "label2id": {
77
+ "T1005": 0,
78
+ "T1021": 1,
79
+ "T1027": 2,
80
+ "T1033": 3,
81
+ "T1036": 4,
82
+ "T1041": 5,
83
+ "T1046": 6,
84
+ "T1048": 7,
85
+ "T1049": 8,
86
+ "T1053": 9,
87
+ "T1055": 10,
88
+ "T1056": 11,
89
+ "T1057": 12,
90
+ "T1059": 13,
91
+ "T1070": 14,
92
+ "T1071": 15,
93
+ "T1074": 16,
94
+ "T1078": 17,
95
+ "T1082": 18,
96
+ "T1083": 19,
97
+ "T1098": 20,
98
+ "T1102": 21,
99
+ "T1105": 22,
100
+ "T1110": 23,
101
+ "T1113": 24,
102
+ "T1114": 25,
103
+ "T1115": 26,
104
+ "T1132": 27,
105
+ "T1137": 28,
106
+ "T1140": 29,
107
+ "T1189": 30,
108
+ "T1190": 31,
109
+ "T1195": 32,
110
+ "T1203": 33,
111
+ "T1204": 34,
112
+ "T1218": 35,
113
+ "T1486": 36,
114
+ "T1491": 37,
115
+ "T1496": 38,
116
+ "T1497": 39,
117
+ "T1499": 40,
118
+ "T1528": 41,
119
+ "T1539": 42,
120
+ "T1547": 43,
121
+ "T1555": 44,
122
+ "T1557": 45,
123
+ "T1562": 46,
124
+ "T1564": 47,
125
+ "T1566": 48,
126
+ "T1567": 49,
127
+ "T1573": 50,
128
+ "T1574": 51,
129
+ "T1583": 52,
130
+ "T1586": 53,
131
+ "T1589": 54,
132
+ "T1606": 55,
133
+ "T1608": 56,
134
+ "T1614": 57,
135
+ "T1620": 58,
136
+ "T1623.001": 59,
137
+ "T1631.001": 60
138
+ },
139
+ "layer_norm_eps": 1e-12,
140
+ "max_position_embeddings": 512,
141
+ "model_type": "bert",
142
+ "num_attention_heads": 12,
143
+ "num_hidden_layers": 12,
144
+ "pad_token_id": 0,
145
+ "position_embedding_type": "absolute",
146
+ "problem_type": "multi_label_classification",
147
+ "torch_dtype": "float32",
148
+ "transformers_version": "4.55.2",
149
+ "type_vocab_size": 2,
150
+ "use_cache": true,
151
+ "vocab_size": 30522
152
+ }
checkpoint-1196/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:775d3d1a0bd8afff119f35e3fd6c8d5b7964891b3a9abbf7765ec715353f2f14
3
+ size 438140132