rm0013 commited on
Commit
7749968
·
verified ·
1 Parent(s): 8ad64f3

Upload folder using huggingface_hub

Browse files
checkpoint-1000/config.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-Activity",
13
+ "1": "B-Administration",
14
+ "2": "B-Age",
15
+ "3": "B-Area",
16
+ "4": "B-Biological_attribute",
17
+ "5": "B-Biological_structure",
18
+ "6": "B-Clinical_event",
19
+ "7": "B-Color",
20
+ "8": "B-Coreference",
21
+ "9": "B-Date",
22
+ "10": "B-Detailed_description",
23
+ "11": "B-Diagnostic_procedure",
24
+ "12": "B-Disease_disorder",
25
+ "13": "B-Distance",
26
+ "14": "B-Dosage",
27
+ "15": "B-Duration",
28
+ "16": "B-Family_history",
29
+ "17": "B-Frequency",
30
+ "18": "B-Height",
31
+ "19": "B-History",
32
+ "20": "B-Lab_value",
33
+ "21": "B-Mass",
34
+ "22": "B-Medication",
35
+ "23": "B-Nonbiological_location",
36
+ "24": "B-Occupation",
37
+ "25": "B-Other_entity",
38
+ "26": "B-Other_event",
39
+ "27": "B-Outcome",
40
+ "28": "B-Personal_background",
41
+ "29": "B-Qualitative_concept",
42
+ "30": "B-Quantitative_concept",
43
+ "31": "B-Severity",
44
+ "32": "B-Sex",
45
+ "33": "B-Shape",
46
+ "34": "B-Sign_symptom",
47
+ "35": "B-Subject",
48
+ "36": "B-Texture",
49
+ "37": "B-Therapeutic_procedure",
50
+ "38": "B-Time",
51
+ "39": "B-Volume",
52
+ "40": "B-Weight",
53
+ "41": "I-Activity",
54
+ "42": "I-Administration",
55
+ "43": "I-Age",
56
+ "44": "I-Area",
57
+ "45": "I-Biological_structure",
58
+ "46": "I-Clinical_event",
59
+ "47": "I-Coreference",
60
+ "48": "I-Date",
61
+ "49": "I-Detailed_description",
62
+ "50": "I-Diagnostic_procedure",
63
+ "51": "I-Disease_disorder",
64
+ "52": "I-Distance",
65
+ "53": "I-Dosage",
66
+ "54": "I-Duration",
67
+ "55": "I-Family_history",
68
+ "56": "I-History",
69
+ "57": "I-Lab_value",
70
+ "58": "I-Mass",
71
+ "59": "I-Medication",
72
+ "60": "I-Nonbiological_location",
73
+ "61": "I-Other_entity",
74
+ "62": "I-Outcome",
75
+ "63": "I-Personal_background",
76
+ "64": "I-Quantitative_concept",
77
+ "65": "I-Severity",
78
+ "66": "I-Sex",
79
+ "67": "I-Sign_symptom",
80
+ "68": "I-Subject",
81
+ "69": "I-Texture",
82
+ "70": "I-Therapeutic_procedure",
83
+ "71": "I-Time",
84
+ "72": "I-Volume",
85
+ "73": "O"
86
+ },
87
+ "initializer_range": 0.02,
88
+ "label2id": {
89
+ "B-Activity": 0,
90
+ "B-Administration": 1,
91
+ "B-Age": 2,
92
+ "B-Area": 3,
93
+ "B-Biological_attribute": 4,
94
+ "B-Biological_structure": 5,
95
+ "B-Clinical_event": 6,
96
+ "B-Color": 7,
97
+ "B-Coreference": 8,
98
+ "B-Date": 9,
99
+ "B-Detailed_description": 10,
100
+ "B-Diagnostic_procedure": 11,
101
+ "B-Disease_disorder": 12,
102
+ "B-Distance": 13,
103
+ "B-Dosage": 14,
104
+ "B-Duration": 15,
105
+ "B-Family_history": 16,
106
+ "B-Frequency": 17,
107
+ "B-Height": 18,
108
+ "B-History": 19,
109
+ "B-Lab_value": 20,
110
+ "B-Mass": 21,
111
+ "B-Medication": 22,
112
+ "B-Nonbiological_location": 23,
113
+ "B-Occupation": 24,
114
+ "B-Other_entity": 25,
115
+ "B-Other_event": 26,
116
+ "B-Outcome": 27,
117
+ "B-Personal_background": 28,
118
+ "B-Qualitative_concept": 29,
119
+ "B-Quantitative_concept": 30,
120
+ "B-Severity": 31,
121
+ "B-Sex": 32,
122
+ "B-Shape": 33,
123
+ "B-Sign_symptom": 34,
124
+ "B-Subject": 35,
125
+ "B-Texture": 36,
126
+ "B-Therapeutic_procedure": 37,
127
+ "B-Time": 38,
128
+ "B-Volume": 39,
129
+ "B-Weight": 40,
130
+ "I-Activity": 41,
131
+ "I-Administration": 42,
132
+ "I-Age": 43,
133
+ "I-Area": 44,
134
+ "I-Biological_structure": 45,
135
+ "I-Clinical_event": 46,
136
+ "I-Coreference": 47,
137
+ "I-Date": 48,
138
+ "I-Detailed_description": 49,
139
+ "I-Diagnostic_procedure": 50,
140
+ "I-Disease_disorder": 51,
141
+ "I-Distance": 52,
142
+ "I-Dosage": 53,
143
+ "I-Duration": 54,
144
+ "I-Family_history": 55,
145
+ "I-History": 56,
146
+ "I-Lab_value": 57,
147
+ "I-Mass": 58,
148
+ "I-Medication": 59,
149
+ "I-Nonbiological_location": 60,
150
+ "I-Other_entity": 61,
151
+ "I-Outcome": 62,
152
+ "I-Personal_background": 63,
153
+ "I-Quantitative_concept": 64,
154
+ "I-Severity": 65,
155
+ "I-Sex": 66,
156
+ "I-Sign_symptom": 67,
157
+ "I-Subject": 68,
158
+ "I-Texture": 69,
159
+ "I-Therapeutic_procedure": 70,
160
+ "I-Time": 71,
161
+ "I-Volume": 72,
162
+ "O": 73
163
+ },
164
+ "max_position_embeddings": 512,
165
+ "model_type": "distilbert",
166
+ "n_heads": 12,
167
+ "n_layers": 6,
168
+ "pad_token_id": 0,
169
+ "qa_dropout": 0.1,
170
+ "seq_classif_dropout": 0.2,
171
+ "sinusoidal_pos_embds": false,
172
+ "tie_weights_": true,
173
+ "torch_dtype": "float32",
174
+ "transformers_version": "4.49.0",
175
+ "vocab_size": 30522
176
+ }
checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66105cdf50d0239d3bac04e0fa72a22c3ba8b7df6f1842622809e99a35975a29
3
+ size 265691496
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8b3d57ad76b70fe322f7703a6f6cba6bf5f5067fd514ce894dc9d21d18283e4
3
+ size 531440954
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8915c23050f659e686094053a99d91ccceb84572d20ef244f15f83a250dc71fe
3
+ size 13990
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b67834aacf6f555d713cdb4b053a4620bd2b12a56cc8d1be6b42a81da28112
3
+ size 1064
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,1273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6937354988399073,
3
+ "best_model_checkpoint": "outputs/models/distilbert-clinical-ner/checkpoint-770",
4
+ "epoch": 100.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_f1": 0.019590382902938554,
14
+ "eval_loss": 4.235354900360107,
15
+ "eval_precision": 0.01608187134502924,
16
+ "eval_recall": 0.025056947608200455,
17
+ "eval_runtime": 0.1555,
18
+ "eval_samples_per_second": 128.613,
19
+ "eval_steps_per_second": 12.861,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_f1": 0.023399014778325126,
25
+ "eval_loss": 4.040920734405518,
26
+ "eval_precision": 0.02546916890080429,
27
+ "eval_recall": 0.02164009111617312,
28
+ "eval_runtime": 0.1064,
29
+ "eval_samples_per_second": 187.984,
30
+ "eval_steps_per_second": 18.798,
31
+ "step": 20
32
+ },
33
+ {
34
+ "epoch": 3.0,
35
+ "eval_f1": 0.0045045045045045045,
36
+ "eval_loss": 3.6175544261932373,
37
+ "eval_precision": 0.2,
38
+ "eval_recall": 0.002277904328018223,
39
+ "eval_runtime": 0.1035,
40
+ "eval_samples_per_second": 193.236,
41
+ "eval_steps_per_second": 19.324,
42
+ "step": 30
43
+ },
44
+ {
45
+ "epoch": 4.0,
46
+ "eval_f1": 0.0,
47
+ "eval_loss": 3.0632073879241943,
48
+ "eval_precision": 0.0,
49
+ "eval_recall": 0.0,
50
+ "eval_runtime": 0.1056,
51
+ "eval_samples_per_second": 189.304,
52
+ "eval_steps_per_second": 18.93,
53
+ "step": 40
54
+ },
55
+ {
56
+ "epoch": 5.0,
57
+ "grad_norm": 2.285052537918091,
58
+ "learning_rate": 1e-05,
59
+ "loss": 3.7184,
60
+ "step": 50
61
+ },
62
+ {
63
+ "epoch": 5.0,
64
+ "eval_f1": 0.0,
65
+ "eval_loss": 2.6725738048553467,
66
+ "eval_precision": 0.0,
67
+ "eval_recall": 0.0,
68
+ "eval_runtime": 0.1038,
69
+ "eval_samples_per_second": 192.708,
70
+ "eval_steps_per_second": 19.271,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 6.0,
75
+ "eval_f1": 0.0022753128555176336,
76
+ "eval_loss": 2.3172459602355957,
77
+ "eval_precision": 1.0,
78
+ "eval_recall": 0.0011389521640091116,
79
+ "eval_runtime": 0.1056,
80
+ "eval_samples_per_second": 189.369,
81
+ "eval_steps_per_second": 18.937,
82
+ "step": 60
83
+ },
84
+ {
85
+ "epoch": 7.0,
86
+ "eval_f1": 0.24705882352941175,
87
+ "eval_loss": 2.010183334350586,
88
+ "eval_precision": 0.34854771784232363,
89
+ "eval_recall": 0.19134396355353075,
90
+ "eval_runtime": 0.1045,
91
+ "eval_samples_per_second": 191.384,
92
+ "eval_steps_per_second": 19.138,
93
+ "step": 70
94
+ },
95
+ {
96
+ "epoch": 8.0,
97
+ "eval_f1": 0.3578104138851802,
98
+ "eval_loss": 1.7770382165908813,
99
+ "eval_precision": 0.432258064516129,
100
+ "eval_recall": 0.3052391799544419,
101
+ "eval_runtime": 0.1043,
102
+ "eval_samples_per_second": 191.78,
103
+ "eval_steps_per_second": 19.178,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 9.0,
108
+ "eval_f1": 0.4306709265175719,
109
+ "eval_loss": 1.578804612159729,
110
+ "eval_precision": 0.49053857350800584,
111
+ "eval_recall": 0.3838268792710706,
112
+ "eval_runtime": 0.1049,
113
+ "eval_samples_per_second": 190.736,
114
+ "eval_steps_per_second": 19.074,
115
+ "step": 90
116
+ },
117
+ {
118
+ "epoch": 10.0,
119
+ "grad_norm": 1.6135512590408325,
120
+ "learning_rate": 2e-05,
121
+ "loss": 2.0341,
122
+ "step": 100
123
+ },
124
+ {
125
+ "epoch": 10.0,
126
+ "eval_f1": 0.5129151291512916,
127
+ "eval_loss": 1.4062659740447998,
128
+ "eval_precision": 0.5574866310160428,
129
+ "eval_recall": 0.47494305239179957,
130
+ "eval_runtime": 0.1053,
131
+ "eval_samples_per_second": 189.876,
132
+ "eval_steps_per_second": 18.988,
133
+ "step": 100
134
+ },
135
+ {
136
+ "epoch": 11.0,
137
+ "eval_f1": 0.5652435357787132,
138
+ "eval_loss": 1.2971620559692383,
139
+ "eval_precision": 0.5987261146496815,
140
+ "eval_recall": 0.5353075170842825,
141
+ "eval_runtime": 0.1046,
142
+ "eval_samples_per_second": 191.215,
143
+ "eval_steps_per_second": 19.122,
144
+ "step": 110
145
+ },
146
+ {
147
+ "epoch": 12.0,
148
+ "eval_f1": 0.5844155844155845,
149
+ "eval_loss": 1.22593092918396,
150
+ "eval_precision": 0.6066176470588235,
151
+ "eval_recall": 0.5637813211845103,
152
+ "eval_runtime": 0.1052,
153
+ "eval_samples_per_second": 190.037,
154
+ "eval_steps_per_second": 19.004,
155
+ "step": 120
156
+ },
157
+ {
158
+ "epoch": 13.0,
159
+ "eval_f1": 0.6020642201834862,
160
+ "eval_loss": 1.1731911897659302,
161
+ "eval_precision": 0.6062355658198614,
162
+ "eval_recall": 0.5979498861047836,
163
+ "eval_runtime": 0.1047,
164
+ "eval_samples_per_second": 190.934,
165
+ "eval_steps_per_second": 19.093,
166
+ "step": 130
167
+ },
168
+ {
169
+ "epoch": 14.0,
170
+ "eval_f1": 0.6093023255813953,
171
+ "eval_loss": 1.1392405033111572,
172
+ "eval_precision": 0.6223277909738717,
173
+ "eval_recall": 0.5968109339407744,
174
+ "eval_runtime": 0.1076,
175
+ "eval_samples_per_second": 185.839,
176
+ "eval_steps_per_second": 18.584,
177
+ "step": 140
178
+ },
179
+ {
180
+ "epoch": 15.0,
181
+ "grad_norm": 1.8851341009140015,
182
+ "learning_rate": 1.888888888888889e-05,
183
+ "loss": 1.0843,
184
+ "step": 150
185
+ },
186
+ {
187
+ "epoch": 15.0,
188
+ "eval_f1": 0.6329849012775842,
189
+ "eval_loss": 1.1109048128128052,
190
+ "eval_precision": 0.6457345971563981,
191
+ "eval_recall": 0.6207289293849658,
192
+ "eval_runtime": 0.1045,
193
+ "eval_samples_per_second": 191.342,
194
+ "eval_steps_per_second": 19.134,
195
+ "step": 150
196
+ },
197
+ {
198
+ "epoch": 16.0,
199
+ "eval_f1": 0.6323103647944412,
200
+ "eval_loss": 1.111675500869751,
201
+ "eval_precision": 0.6431095406360424,
202
+ "eval_recall": 0.621867881548975,
203
+ "eval_runtime": 0.1064,
204
+ "eval_samples_per_second": 187.947,
205
+ "eval_steps_per_second": 18.795,
206
+ "step": 160
207
+ },
208
+ {
209
+ "epoch": 17.0,
210
+ "eval_f1": 0.6406976744186046,
211
+ "eval_loss": 1.1105479001998901,
212
+ "eval_precision": 0.6543942992874109,
213
+ "eval_recall": 0.6275626423690205,
214
+ "eval_runtime": 0.1038,
215
+ "eval_samples_per_second": 192.723,
216
+ "eval_steps_per_second": 19.272,
217
+ "step": 170
218
+ },
219
+ {
220
+ "epoch": 18.0,
221
+ "eval_f1": 0.6418338108882521,
222
+ "eval_loss": 1.1081936359405518,
223
+ "eval_precision": 0.6459054209919262,
224
+ "eval_recall": 0.6378132118451025,
225
+ "eval_runtime": 0.1093,
226
+ "eval_samples_per_second": 182.962,
227
+ "eval_steps_per_second": 18.296,
228
+ "step": 180
229
+ },
230
+ {
231
+ "epoch": 19.0,
232
+ "eval_f1": 0.656629994209612,
233
+ "eval_loss": 1.0905473232269287,
234
+ "eval_precision": 0.6678445229681979,
235
+ "eval_recall": 0.6457858769931663,
236
+ "eval_runtime": 0.1058,
237
+ "eval_samples_per_second": 189.016,
238
+ "eval_steps_per_second": 18.902,
239
+ "step": 190
240
+ },
241
+ {
242
+ "epoch": 20.0,
243
+ "grad_norm": 2.453874111175537,
244
+ "learning_rate": 1.7777777777777777e-05,
245
+ "loss": 0.6709,
246
+ "step": 200
247
+ },
248
+ {
249
+ "epoch": 20.0,
250
+ "eval_f1": 0.6527536231884057,
251
+ "eval_loss": 1.0979650020599365,
252
+ "eval_precision": 0.6646989374262101,
253
+ "eval_recall": 0.6412300683371298,
254
+ "eval_runtime": 0.1051,
255
+ "eval_samples_per_second": 190.38,
256
+ "eval_steps_per_second": 19.038,
257
+ "step": 200
258
+ },
259
+ {
260
+ "epoch": 21.0,
261
+ "eval_f1": 0.660889659156557,
262
+ "eval_loss": 1.1073075532913208,
263
+ "eval_precision": 0.6705744431418523,
264
+ "eval_recall": 0.6514806378132119,
265
+ "eval_runtime": 0.1063,
266
+ "eval_samples_per_second": 188.158,
267
+ "eval_steps_per_second": 18.816,
268
+ "step": 210
269
+ },
270
+ {
271
+ "epoch": 22.0,
272
+ "eval_f1": 0.6520231213872832,
273
+ "eval_loss": 1.109878420829773,
274
+ "eval_precision": 0.6619718309859155,
275
+ "eval_recall": 0.642369020501139,
276
+ "eval_runtime": 0.1108,
277
+ "eval_samples_per_second": 180.582,
278
+ "eval_steps_per_second": 18.058,
279
+ "step": 220
280
+ },
281
+ {
282
+ "epoch": 23.0,
283
+ "eval_f1": 0.6666666666666666,
284
+ "eval_loss": 1.1079308986663818,
285
+ "eval_precision": 0.6838323353293413,
286
+ "eval_recall": 0.6503416856492027,
287
+ "eval_runtime": 0.1051,
288
+ "eval_samples_per_second": 190.371,
289
+ "eval_steps_per_second": 19.037,
290
+ "step": 230
291
+ },
292
+ {
293
+ "epoch": 24.0,
294
+ "eval_f1": 0.6685714285714285,
295
+ "eval_loss": 1.0995490550994873,
296
+ "eval_precision": 0.6708715596330275,
297
+ "eval_recall": 0.6662870159453302,
298
+ "eval_runtime": 0.1076,
299
+ "eval_samples_per_second": 185.917,
300
+ "eval_steps_per_second": 18.592,
301
+ "step": 240
302
+ },
303
+ {
304
+ "epoch": 25.0,
305
+ "grad_norm": 1.9804632663726807,
306
+ "learning_rate": 1.6666666666666667e-05,
307
+ "loss": 0.4563,
308
+ "step": 250
309
+ },
310
+ {
311
+ "epoch": 25.0,
312
+ "eval_f1": 0.6623831775700935,
313
+ "eval_loss": 1.1412475109100342,
314
+ "eval_precision": 0.6798561151079137,
315
+ "eval_recall": 0.6457858769931663,
316
+ "eval_runtime": 0.1043,
317
+ "eval_samples_per_second": 191.672,
318
+ "eval_steps_per_second": 19.167,
319
+ "step": 250
320
+ },
321
+ {
322
+ "epoch": 26.0,
323
+ "eval_f1": 0.6685878962536023,
324
+ "eval_loss": 1.1206316947937012,
325
+ "eval_precision": 0.676779463243874,
326
+ "eval_recall": 0.6605922551252847,
327
+ "eval_runtime": 0.1025,
328
+ "eval_samples_per_second": 195.213,
329
+ "eval_steps_per_second": 19.521,
330
+ "step": 260
331
+ },
332
+ {
333
+ "epoch": 27.0,
334
+ "eval_f1": 0.674013921113689,
335
+ "eval_loss": 1.1223537921905518,
336
+ "eval_precision": 0.6867612293144209,
337
+ "eval_recall": 0.6617312072892938,
338
+ "eval_runtime": 0.1023,
339
+ "eval_samples_per_second": 195.488,
340
+ "eval_steps_per_second": 19.549,
341
+ "step": 270
342
+ },
343
+ {
344
+ "epoch": 28.0,
345
+ "eval_f1": 0.6787595084844938,
346
+ "eval_loss": 1.1268706321716309,
347
+ "eval_precision": 0.6979542719614922,
348
+ "eval_recall": 0.6605922551252847,
349
+ "eval_runtime": 0.107,
350
+ "eval_samples_per_second": 186.844,
351
+ "eval_steps_per_second": 18.684,
352
+ "step": 280
353
+ },
354
+ {
355
+ "epoch": 29.0,
356
+ "eval_f1": 0.6817391304347827,
357
+ "eval_loss": 1.143689751625061,
358
+ "eval_precision": 0.6942148760330579,
359
+ "eval_recall": 0.6697038724373576,
360
+ "eval_runtime": 0.1048,
361
+ "eval_samples_per_second": 190.916,
362
+ "eval_steps_per_second": 19.092,
363
+ "step": 290
364
+ },
365
+ {
366
+ "epoch": 30.0,
367
+ "grad_norm": 1.2570631504058838,
368
+ "learning_rate": 1.555555555555556e-05,
369
+ "loss": 0.3229,
370
+ "step": 300
371
+ },
372
+ {
373
+ "epoch": 30.0,
374
+ "eval_f1": 0.6781807714450201,
375
+ "eval_loss": 1.1411770582199097,
376
+ "eval_precision": 0.6856810244470314,
377
+ "eval_recall": 0.6708428246013668,
378
+ "eval_runtime": 0.1118,
379
+ "eval_samples_per_second": 178.865,
380
+ "eval_steps_per_second": 17.887,
381
+ "step": 300
382
+ },
383
+ {
384
+ "epoch": 31.0,
385
+ "eval_f1": 0.6797235023041475,
386
+ "eval_loss": 1.1665282249450684,
387
+ "eval_precision": 0.6876456876456877,
388
+ "eval_recall": 0.6719817767653758,
389
+ "eval_runtime": 0.1061,
390
+ "eval_samples_per_second": 188.516,
391
+ "eval_steps_per_second": 18.852,
392
+ "step": 310
393
+ },
394
+ {
395
+ "epoch": 32.0,
396
+ "eval_f1": 0.6785714285714286,
397
+ "eval_loss": 1.147858738899231,
398
+ "eval_precision": 0.6864801864801865,
399
+ "eval_recall": 0.6708428246013668,
400
+ "eval_runtime": 0.1079,
401
+ "eval_samples_per_second": 185.36,
402
+ "eval_steps_per_second": 18.536,
403
+ "step": 320
404
+ },
405
+ {
406
+ "epoch": 33.0,
407
+ "eval_f1": 0.6774193548387096,
408
+ "eval_loss": 1.1554282903671265,
409
+ "eval_precision": 0.6853146853146853,
410
+ "eval_recall": 0.6697038724373576,
411
+ "eval_runtime": 0.1037,
412
+ "eval_samples_per_second": 192.925,
413
+ "eval_steps_per_second": 19.292,
414
+ "step": 330
415
+ },
416
+ {
417
+ "epoch": 34.0,
418
+ "eval_f1": 0.6797687861271676,
419
+ "eval_loss": 1.15742027759552,
420
+ "eval_precision": 0.6901408450704225,
421
+ "eval_recall": 0.6697038724373576,
422
+ "eval_runtime": 0.1053,
423
+ "eval_samples_per_second": 189.904,
424
+ "eval_steps_per_second": 18.99,
425
+ "step": 340
426
+ },
427
+ {
428
+ "epoch": 35.0,
429
+ "grad_norm": 1.0945429801940918,
430
+ "learning_rate": 1.4444444444444446e-05,
431
+ "loss": 0.2396,
432
+ "step": 350
433
+ },
434
+ {
435
+ "epoch": 35.0,
436
+ "eval_f1": 0.6797235023041475,
437
+ "eval_loss": 1.1754865646362305,
438
+ "eval_precision": 0.6876456876456877,
439
+ "eval_recall": 0.6719817767653758,
440
+ "eval_runtime": 0.1057,
441
+ "eval_samples_per_second": 189.199,
442
+ "eval_steps_per_second": 18.92,
443
+ "step": 350
444
+ },
445
+ {
446
+ "epoch": 36.0,
447
+ "eval_f1": 0.6735870818915801,
448
+ "eval_loss": 1.1885067224502563,
449
+ "eval_precision": 0.6822429906542056,
450
+ "eval_recall": 0.6651480637813212,
451
+ "eval_runtime": 0.1,
452
+ "eval_samples_per_second": 200.0,
453
+ "eval_steps_per_second": 20.0,
454
+ "step": 360
455
+ },
456
+ {
457
+ "epoch": 37.0,
458
+ "eval_f1": 0.6806526806526807,
459
+ "eval_loss": 1.181584119796753,
460
+ "eval_precision": 0.6968973747016707,
461
+ "eval_recall": 0.6651480637813212,
462
+ "eval_runtime": 0.1076,
463
+ "eval_samples_per_second": 185.858,
464
+ "eval_steps_per_second": 18.586,
465
+ "step": 370
466
+ },
467
+ {
468
+ "epoch": 38.0,
469
+ "eval_f1": 0.6787172011661807,
470
+ "eval_loss": 1.1876161098480225,
471
+ "eval_precision": 0.6953405017921147,
472
+ "eval_recall": 0.662870159453303,
473
+ "eval_runtime": 0.1161,
474
+ "eval_samples_per_second": 172.269,
475
+ "eval_steps_per_second": 17.227,
476
+ "step": 380
477
+ },
478
+ {
479
+ "epoch": 39.0,
480
+ "eval_f1": 0.6818713450292397,
481
+ "eval_loss": 1.2116471529006958,
482
+ "eval_precision": 0.7007211538461539,
483
+ "eval_recall": 0.664009111617312,
484
+ "eval_runtime": 0.1027,
485
+ "eval_samples_per_second": 194.71,
486
+ "eval_steps_per_second": 19.471,
487
+ "step": 390
488
+ },
489
+ {
490
+ "epoch": 40.0,
491
+ "grad_norm": 0.8599975109100342,
492
+ "learning_rate": 1.3333333333333333e-05,
493
+ "loss": 0.1848,
494
+ "step": 400
495
+ },
496
+ {
497
+ "epoch": 40.0,
498
+ "eval_f1": 0.6786542923433874,
499
+ "eval_loss": 1.1997044086456299,
500
+ "eval_precision": 0.6914893617021277,
501
+ "eval_recall": 0.6662870159453302,
502
+ "eval_runtime": 0.1098,
503
+ "eval_samples_per_second": 182.124,
504
+ "eval_steps_per_second": 18.212,
505
+ "step": 400
506
+ },
507
+ {
508
+ "epoch": 41.0,
509
+ "eval_f1": 0.6817917393833625,
510
+ "eval_loss": 1.2281190156936646,
511
+ "eval_precision": 0.6967895362663495,
512
+ "eval_recall": 0.6674259681093394,
513
+ "eval_runtime": 0.1026,
514
+ "eval_samples_per_second": 195.027,
515
+ "eval_steps_per_second": 19.503,
516
+ "step": 410
517
+ },
518
+ {
519
+ "epoch": 42.0,
520
+ "eval_f1": 0.6820542412002308,
521
+ "eval_loss": 1.205224633216858,
522
+ "eval_precision": 0.6912280701754386,
523
+ "eval_recall": 0.673120728929385,
524
+ "eval_runtime": 0.1029,
525
+ "eval_samples_per_second": 194.454,
526
+ "eval_steps_per_second": 19.445,
527
+ "step": 420
528
+ },
529
+ {
530
+ "epoch": 43.0,
531
+ "eval_f1": 0.6834112149532711,
532
+ "eval_loss": 1.2441879510879517,
533
+ "eval_precision": 0.7014388489208633,
534
+ "eval_recall": 0.6662870159453302,
535
+ "eval_runtime": 0.1013,
536
+ "eval_samples_per_second": 197.51,
537
+ "eval_steps_per_second": 19.751,
538
+ "step": 430
539
+ },
540
+ {
541
+ "epoch": 44.0,
542
+ "eval_f1": 0.6762672811059909,
543
+ "eval_loss": 1.2410287857055664,
544
+ "eval_precision": 0.6841491841491841,
545
+ "eval_recall": 0.6685649202733486,
546
+ "eval_runtime": 0.1039,
547
+ "eval_samples_per_second": 192.454,
548
+ "eval_steps_per_second": 19.245,
549
+ "step": 440
550
+ },
551
+ {
552
+ "epoch": 45.0,
553
+ "grad_norm": 0.8666434288024902,
554
+ "learning_rate": 1.2222222222222224e-05,
555
+ "loss": 0.1472,
556
+ "step": 450
557
+ },
558
+ {
559
+ "epoch": 45.0,
560
+ "eval_f1": 0.6809744779582366,
561
+ "eval_loss": 1.2373775243759155,
562
+ "eval_precision": 0.693853427895981,
563
+ "eval_recall": 0.6685649202733486,
564
+ "eval_runtime": 0.1016,
565
+ "eval_samples_per_second": 196.912,
566
+ "eval_steps_per_second": 19.691,
567
+ "step": 450
568
+ },
569
+ {
570
+ "epoch": 46.0,
571
+ "eval_f1": 0.6786961583236321,
572
+ "eval_loss": 1.237067461013794,
573
+ "eval_precision": 0.694047619047619,
574
+ "eval_recall": 0.664009111617312,
575
+ "eval_runtime": 0.1002,
576
+ "eval_samples_per_second": 199.663,
577
+ "eval_steps_per_second": 19.966,
578
+ "step": 460
579
+ },
580
+ {
581
+ "epoch": 47.0,
582
+ "eval_f1": 0.6889016676250718,
583
+ "eval_loss": 1.2270065546035767,
584
+ "eval_precision": 0.6957026713124274,
585
+ "eval_recall": 0.6822323462414579,
586
+ "eval_runtime": 0.1051,
587
+ "eval_samples_per_second": 190.295,
588
+ "eval_steps_per_second": 19.03,
589
+ "step": 470
590
+ },
591
+ {
592
+ "epoch": 48.0,
593
+ "eval_f1": 0.6805555555555555,
594
+ "eval_loss": 1.238765001296997,
595
+ "eval_precision": 0.691764705882353,
596
+ "eval_recall": 0.6697038724373576,
597
+ "eval_runtime": 0.1066,
598
+ "eval_samples_per_second": 187.677,
599
+ "eval_steps_per_second": 18.768,
600
+ "step": 480
601
+ },
602
+ {
603
+ "epoch": 49.0,
604
+ "eval_f1": 0.6790914385556202,
605
+ "eval_loss": 1.2525634765625,
606
+ "eval_precision": 0.6948748510131109,
607
+ "eval_recall": 0.664009111617312,
608
+ "eval_runtime": 0.1021,
609
+ "eval_samples_per_second": 195.942,
610
+ "eval_steps_per_second": 19.594,
611
+ "step": 490
612
+ },
613
+ {
614
+ "epoch": 50.0,
615
+ "grad_norm": 0.8096102476119995,
616
+ "learning_rate": 1.1111111111111113e-05,
617
+ "loss": 0.1184,
618
+ "step": 500
619
+ },
620
+ {
621
+ "epoch": 50.0,
622
+ "eval_f1": 0.681421083284799,
623
+ "eval_loss": 1.2390888929367065,
624
+ "eval_precision": 0.6972586412395709,
625
+ "eval_recall": 0.6662870159453302,
626
+ "eval_runtime": 0.1042,
627
+ "eval_samples_per_second": 191.926,
628
+ "eval_steps_per_second": 19.193,
629
+ "step": 500
630
+ },
631
+ {
632
+ "epoch": 51.0,
633
+ "eval_f1": 0.6782810685249709,
634
+ "eval_loss": 1.2617552280426025,
635
+ "eval_precision": 0.6919431279620853,
636
+ "eval_recall": 0.6651480637813212,
637
+ "eval_runtime": 0.1011,
638
+ "eval_samples_per_second": 197.736,
639
+ "eval_steps_per_second": 19.774,
640
+ "step": 510
641
+ },
642
+ {
643
+ "epoch": 52.0,
644
+ "eval_f1": 0.6814469078179697,
645
+ "eval_loss": 1.2573115825653076,
646
+ "eval_precision": 0.6985645933014354,
647
+ "eval_recall": 0.6651480637813212,
648
+ "eval_runtime": 0.1033,
649
+ "eval_samples_per_second": 193.643,
650
+ "eval_steps_per_second": 19.364,
651
+ "step": 520
652
+ },
653
+ {
654
+ "epoch": 53.0,
655
+ "eval_f1": 0.681369704004643,
656
+ "eval_loss": 1.2631280422210693,
657
+ "eval_precision": 0.6946745562130178,
658
+ "eval_recall": 0.6685649202733486,
659
+ "eval_runtime": 0.1027,
660
+ "eval_samples_per_second": 194.742,
661
+ "eval_steps_per_second": 19.474,
662
+ "step": 530
663
+ },
664
+ {
665
+ "epoch": 54.0,
666
+ "eval_f1": 0.6822429906542057,
667
+ "eval_loss": 1.2612630128860474,
668
+ "eval_precision": 0.7002398081534772,
669
+ "eval_recall": 0.6651480637813212,
670
+ "eval_runtime": 0.1034,
671
+ "eval_samples_per_second": 193.355,
672
+ "eval_steps_per_second": 19.335,
673
+ "step": 540
674
+ },
675
+ {
676
+ "epoch": 55.0,
677
+ "grad_norm": 1.258770227432251,
678
+ "learning_rate": 1e-05,
679
+ "loss": 0.0965,
680
+ "step": 550
681
+ },
682
+ {
683
+ "epoch": 55.0,
684
+ "eval_f1": 0.678592036930179,
685
+ "eval_loss": 1.2565994262695312,
686
+ "eval_precision": 0.6877192982456141,
687
+ "eval_recall": 0.6697038724373576,
688
+ "eval_runtime": 0.1065,
689
+ "eval_samples_per_second": 187.769,
690
+ "eval_steps_per_second": 18.777,
691
+ "step": 550
692
+ },
693
+ {
694
+ "epoch": 56.0,
695
+ "eval_f1": 0.6720461095100864,
696
+ "eval_loss": 1.286030888557434,
697
+ "eval_precision": 0.6802800466744457,
698
+ "eval_recall": 0.664009111617312,
699
+ "eval_runtime": 0.1017,
700
+ "eval_samples_per_second": 196.637,
701
+ "eval_steps_per_second": 19.664,
702
+ "step": 560
703
+ },
704
+ {
705
+ "epoch": 57.0,
706
+ "eval_f1": 0.6774941995359629,
707
+ "eval_loss": 1.2746167182922363,
708
+ "eval_precision": 0.6903073286052009,
709
+ "eval_recall": 0.6651480637813212,
710
+ "eval_runtime": 0.104,
711
+ "eval_samples_per_second": 192.291,
712
+ "eval_steps_per_second": 19.229,
713
+ "step": 570
714
+ },
715
+ {
716
+ "epoch": 58.0,
717
+ "eval_f1": 0.6813441483198146,
718
+ "eval_loss": 1.2719839811325073,
719
+ "eval_precision": 0.6933962264150944,
720
+ "eval_recall": 0.6697038724373576,
721
+ "eval_runtime": 0.1009,
722
+ "eval_samples_per_second": 198.12,
723
+ "eval_steps_per_second": 19.812,
724
+ "step": 580
725
+ },
726
+ {
727
+ "epoch": 59.0,
728
+ "eval_f1": 0.6845168800931315,
729
+ "eval_loss": 1.2790720462799072,
730
+ "eval_precision": 0.7,
731
+ "eval_recall": 0.6697038724373576,
732
+ "eval_runtime": 0.1025,
733
+ "eval_samples_per_second": 195.09,
734
+ "eval_steps_per_second": 19.509,
735
+ "step": 590
736
+ },
737
+ {
738
+ "epoch": 60.0,
739
+ "grad_norm": 0.6421855688095093,
740
+ "learning_rate": 8.888888888888888e-06,
741
+ "loss": 0.0841,
742
+ "step": 600
743
+ },
744
+ {
745
+ "epoch": 60.0,
746
+ "eval_f1": 0.6827309236947792,
747
+ "eval_loss": 1.2661317586898804,
748
+ "eval_precision": 0.6878612716763006,
749
+ "eval_recall": 0.6776765375854215,
750
+ "eval_runtime": 0.1054,
751
+ "eval_samples_per_second": 189.834,
752
+ "eval_steps_per_second": 18.983,
753
+ "step": 600
754
+ },
755
+ {
756
+ "epoch": 61.0,
757
+ "eval_f1": 0.6803039158386909,
758
+ "eval_loss": 1.2873255014419556,
759
+ "eval_precision": 0.6986794717887155,
760
+ "eval_recall": 0.662870159453303,
761
+ "eval_runtime": 0.1028,
762
+ "eval_samples_per_second": 194.643,
763
+ "eval_steps_per_second": 19.464,
764
+ "step": 610
765
+ },
766
+ {
767
+ "epoch": 62.0,
768
+ "eval_f1": 0.6809248554913295,
769
+ "eval_loss": 1.2766045331954956,
770
+ "eval_precision": 0.6913145539906104,
771
+ "eval_recall": 0.6708428246013668,
772
+ "eval_runtime": 0.1044,
773
+ "eval_samples_per_second": 191.487,
774
+ "eval_steps_per_second": 19.149,
775
+ "step": 620
776
+ },
777
+ {
778
+ "epoch": 63.0,
779
+ "eval_f1": 0.6755658734764944,
780
+ "eval_loss": 1.2915081977844238,
781
+ "eval_precision": 0.6887573964497041,
782
+ "eval_recall": 0.662870159453303,
783
+ "eval_runtime": 0.1078,
784
+ "eval_samples_per_second": 185.542,
785
+ "eval_steps_per_second": 18.554,
786
+ "step": 630
787
+ },
788
+ {
789
+ "epoch": 64.0,
790
+ "eval_f1": 0.6732101616628174,
791
+ "eval_loss": 1.2793446779251099,
792
+ "eval_precision": 0.6826697892271663,
793
+ "eval_recall": 0.664009111617312,
794
+ "eval_runtime": 0.1016,
795
+ "eval_samples_per_second": 196.942,
796
+ "eval_steps_per_second": 19.694,
797
+ "step": 640
798
+ },
799
+ {
800
+ "epoch": 65.0,
801
+ "grad_norm": 0.5916463136672974,
802
+ "learning_rate": 7.77777777777778e-06,
803
+ "loss": 0.0714,
804
+ "step": 650
805
+ },
806
+ {
807
+ "epoch": 65.0,
808
+ "eval_f1": 0.6820276497695852,
809
+ "eval_loss": 1.3019025325775146,
810
+ "eval_precision": 0.6899766899766899,
811
+ "eval_recall": 0.6742596810933941,
812
+ "eval_runtime": 0.1008,
813
+ "eval_samples_per_second": 198.393,
814
+ "eval_steps_per_second": 19.839,
815
+ "step": 650
816
+ },
817
+ {
818
+ "epoch": 66.0,
819
+ "eval_f1": 0.6853473438412142,
820
+ "eval_loss": 1.2911741733551025,
821
+ "eval_precision": 0.7029940119760479,
822
+ "eval_recall": 0.6685649202733486,
823
+ "eval_runtime": 0.106,
824
+ "eval_samples_per_second": 188.757,
825
+ "eval_steps_per_second": 18.876,
826
+ "step": 660
827
+ },
828
+ {
829
+ "epoch": 67.0,
830
+ "eval_f1": 0.67816091954023,
831
+ "eval_loss": 1.2967917919158936,
832
+ "eval_precision": 0.6844547563805105,
833
+ "eval_recall": 0.6719817767653758,
834
+ "eval_runtime": 0.1092,
835
+ "eval_samples_per_second": 183.175,
836
+ "eval_steps_per_second": 18.318,
837
+ "step": 670
838
+ },
839
+ {
840
+ "epoch": 68.0,
841
+ "eval_f1": 0.6895348837209303,
842
+ "eval_loss": 1.2946228981018066,
843
+ "eval_precision": 0.7042755344418052,
844
+ "eval_recall": 0.6753986332574032,
845
+ "eval_runtime": 0.1052,
846
+ "eval_samples_per_second": 190.167,
847
+ "eval_steps_per_second": 19.017,
848
+ "step": 680
849
+ },
850
+ {
851
+ "epoch": 69.0,
852
+ "eval_f1": 0.6863425925925927,
853
+ "eval_loss": 1.2976857423782349,
854
+ "eval_precision": 0.6976470588235294,
855
+ "eval_recall": 0.6753986332574032,
856
+ "eval_runtime": 0.103,
857
+ "eval_samples_per_second": 194.22,
858
+ "eval_steps_per_second": 19.422,
859
+ "step": 690
860
+ },
861
+ {
862
+ "epoch": 70.0,
863
+ "grad_norm": 0.7995481491088867,
864
+ "learning_rate": 6.666666666666667e-06,
865
+ "loss": 0.064,
866
+ "step": 700
867
+ },
868
+ {
869
+ "epoch": 70.0,
870
+ "eval_f1": 0.6771014492753623,
871
+ "eval_loss": 1.295568585395813,
872
+ "eval_precision": 0.6894923258559622,
873
+ "eval_recall": 0.6651480637813212,
874
+ "eval_runtime": 0.1031,
875
+ "eval_samples_per_second": 193.92,
876
+ "eval_steps_per_second": 19.392,
877
+ "step": 700
878
+ },
879
+ {
880
+ "epoch": 71.0,
881
+ "eval_f1": 0.6797687861271676,
882
+ "eval_loss": 1.2898850440979004,
883
+ "eval_precision": 0.6901408450704225,
884
+ "eval_recall": 0.6697038724373576,
885
+ "eval_runtime": 0.1005,
886
+ "eval_samples_per_second": 198.942,
887
+ "eval_steps_per_second": 19.894,
888
+ "step": 710
889
+ },
890
+ {
891
+ "epoch": 72.0,
892
+ "eval_f1": 0.6806526806526807,
893
+ "eval_loss": 1.3075172901153564,
894
+ "eval_precision": 0.6968973747016707,
895
+ "eval_recall": 0.6651480637813212,
896
+ "eval_runtime": 0.1087,
897
+ "eval_samples_per_second": 184.017,
898
+ "eval_steps_per_second": 18.402,
899
+ "step": 720
900
+ },
901
+ {
902
+ "epoch": 73.0,
903
+ "eval_f1": 0.683750728013978,
904
+ "eval_loss": 1.2973222732543945,
905
+ "eval_precision": 0.699642431466031,
906
+ "eval_recall": 0.6685649202733486,
907
+ "eval_runtime": 0.1054,
908
+ "eval_samples_per_second": 189.672,
909
+ "eval_steps_per_second": 18.967,
910
+ "step": 730
911
+ },
912
+ {
913
+ "epoch": 74.0,
914
+ "eval_f1": 0.6872812135355894,
915
+ "eval_loss": 1.3102946281433105,
916
+ "eval_precision": 0.7045454545454546,
917
+ "eval_recall": 0.6708428246013668,
918
+ "eval_runtime": 0.1064,
919
+ "eval_samples_per_second": 188.0,
920
+ "eval_steps_per_second": 18.8,
921
+ "step": 740
922
+ },
923
+ {
924
+ "epoch": 75.0,
925
+ "grad_norm": 0.8353786468505859,
926
+ "learning_rate": 5.555555555555557e-06,
927
+ "loss": 0.0575,
928
+ "step": 750
929
+ },
930
+ {
931
+ "epoch": 75.0,
932
+ "eval_f1": 0.6797915460335843,
933
+ "eval_loss": 1.3147099018096924,
934
+ "eval_precision": 0.6914016489988222,
935
+ "eval_recall": 0.6685649202733486,
936
+ "eval_runtime": 0.1049,
937
+ "eval_samples_per_second": 190.589,
938
+ "eval_steps_per_second": 19.059,
939
+ "step": 750
940
+ },
941
+ {
942
+ "epoch": 76.0,
943
+ "eval_f1": 0.6845168800931315,
944
+ "eval_loss": 1.3087732791900635,
945
+ "eval_precision": 0.7,
946
+ "eval_recall": 0.6697038724373576,
947
+ "eval_runtime": 0.105,
948
+ "eval_samples_per_second": 190.4,
949
+ "eval_steps_per_second": 19.04,
950
+ "step": 760
951
+ },
952
+ {
953
+ "epoch": 77.0,
954
+ "eval_f1": 0.6937354988399073,
955
+ "eval_loss": 1.3092721700668335,
956
+ "eval_precision": 0.706855791962175,
957
+ "eval_recall": 0.6810933940774487,
958
+ "eval_runtime": 0.1053,
959
+ "eval_samples_per_second": 189.996,
960
+ "eval_steps_per_second": 19.0,
961
+ "step": 770
962
+ },
963
+ {
964
+ "epoch": 78.0,
965
+ "eval_f1": 0.6890951276102089,
966
+ "eval_loss": 1.3059155941009521,
967
+ "eval_precision": 0.7021276595744681,
968
+ "eval_recall": 0.6765375854214123,
969
+ "eval_runtime": 0.1108,
970
+ "eval_samples_per_second": 180.481,
971
+ "eval_steps_per_second": 18.048,
972
+ "step": 780
973
+ },
974
+ {
975
+ "epoch": 79.0,
976
+ "eval_f1": 0.686012768427162,
977
+ "eval_loss": 1.3059110641479492,
978
+ "eval_precision": 0.6994082840236686,
979
+ "eval_recall": 0.673120728929385,
980
+ "eval_runtime": 0.1188,
981
+ "eval_samples_per_second": 168.352,
982
+ "eval_steps_per_second": 16.835,
983
+ "step": 790
984
+ },
985
+ {
986
+ "epoch": 80.0,
987
+ "grad_norm": 0.9341310858726501,
988
+ "learning_rate": 4.444444444444444e-06,
989
+ "loss": 0.0521,
990
+ "step": 800
991
+ },
992
+ {
993
+ "epoch": 80.0,
994
+ "eval_f1": 0.684393063583815,
995
+ "eval_loss": 1.3110581636428833,
996
+ "eval_precision": 0.6948356807511737,
997
+ "eval_recall": 0.6742596810933941,
998
+ "eval_runtime": 0.1046,
999
+ "eval_samples_per_second": 191.295,
1000
+ "eval_steps_per_second": 19.13,
1001
+ "step": 800
1002
+ },
1003
+ {
1004
+ "epoch": 81.0,
1005
+ "eval_f1": 0.6876824284880326,
1006
+ "eval_loss": 1.3156492710113525,
1007
+ "eval_precision": 0.7053892215568862,
1008
+ "eval_recall": 0.6708428246013668,
1009
+ "eval_runtime": 0.1053,
1010
+ "eval_samples_per_second": 189.894,
1011
+ "eval_steps_per_second": 18.989,
1012
+ "step": 810
1013
+ },
1014
+ {
1015
+ "epoch": 82.0,
1016
+ "eval_f1": 0.6812680115273776,
1017
+ "eval_loss": 1.3226007223129272,
1018
+ "eval_precision": 0.6896149358226371,
1019
+ "eval_recall": 0.673120728929385,
1020
+ "eval_runtime": 0.1059,
1021
+ "eval_samples_per_second": 188.943,
1022
+ "eval_steps_per_second": 18.894,
1023
+ "step": 820
1024
+ },
1025
+ {
1026
+ "epoch": 83.0,
1027
+ "eval_f1": 0.686046511627907,
1028
+ "eval_loss": 1.318352460861206,
1029
+ "eval_precision": 0.7007125890736342,
1030
+ "eval_recall": 0.6719817767653758,
1031
+ "eval_runtime": 0.1064,
1032
+ "eval_samples_per_second": 187.885,
1033
+ "eval_steps_per_second": 18.789,
1034
+ "step": 830
1035
+ },
1036
+ {
1037
+ "epoch": 84.0,
1038
+ "eval_f1": 0.6872451951077461,
1039
+ "eval_loss": 1.3170461654663086,
1040
+ "eval_precision": 0.7032181168057211,
1041
+ "eval_recall": 0.6719817767653758,
1042
+ "eval_runtime": 0.1059,
1043
+ "eval_samples_per_second": 188.868,
1044
+ "eval_steps_per_second": 18.887,
1045
+ "step": 840
1046
+ },
1047
+ {
1048
+ "epoch": 85.0,
1049
+ "grad_norm": 0.3394979238510132,
1050
+ "learning_rate": 3.3333333333333333e-06,
1051
+ "loss": 0.0491,
1052
+ "step": 850
1053
+ },
1054
+ {
1055
+ "epoch": 85.0,
1056
+ "eval_f1": 0.6856810244470314,
1057
+ "eval_loss": 1.3184274435043335,
1058
+ "eval_precision": 0.7011904761904761,
1059
+ "eval_recall": 0.6708428246013668,
1060
+ "eval_runtime": 0.1079,
1061
+ "eval_samples_per_second": 185.331,
1062
+ "eval_steps_per_second": 18.533,
1063
+ "step": 850
1064
+ },
1065
+ {
1066
+ "epoch": 86.0,
1067
+ "eval_f1": 0.6844238563983788,
1068
+ "eval_loss": 1.3142564296722412,
1069
+ "eval_precision": 0.696113074204947,
1070
+ "eval_recall": 0.673120728929385,
1071
+ "eval_runtime": 0.1054,
1072
+ "eval_samples_per_second": 189.669,
1073
+ "eval_steps_per_second": 18.967,
1074
+ "step": 860
1075
+ },
1076
+ {
1077
+ "epoch": 87.0,
1078
+ "eval_f1": 0.6868451688009312,
1079
+ "eval_loss": 1.3147000074386597,
1080
+ "eval_precision": 0.7023809523809523,
1081
+ "eval_recall": 0.6719817767653758,
1082
+ "eval_runtime": 0.1079,
1083
+ "eval_samples_per_second": 185.364,
1084
+ "eval_steps_per_second": 18.536,
1085
+ "step": 870
1086
+ },
1087
+ {
1088
+ "epoch": 88.0,
1089
+ "eval_f1": 0.6852173913043478,
1090
+ "eval_loss": 1.3220981359481812,
1091
+ "eval_precision": 0.6977567886658795,
1092
+ "eval_recall": 0.673120728929385,
1093
+ "eval_runtime": 0.1143,
1094
+ "eval_samples_per_second": 175.042,
1095
+ "eval_steps_per_second": 17.504,
1096
+ "step": 880
1097
+ },
1098
+ {
1099
+ "epoch": 89.0,
1100
+ "eval_f1": 0.6840277777777779,
1101
+ "eval_loss": 1.3263325691223145,
1102
+ "eval_precision": 0.6952941176470588,
1103
+ "eval_recall": 0.673120728929385,
1104
+ "eval_runtime": 0.1098,
1105
+ "eval_samples_per_second": 182.123,
1106
+ "eval_steps_per_second": 18.212,
1107
+ "step": 890
1108
+ },
1109
+ {
1110
+ "epoch": 90.0,
1111
+ "grad_norm": 0.362501323223114,
1112
+ "learning_rate": 2.222222222222222e-06,
1113
+ "loss": 0.0467,
1114
+ "step": 900
1115
+ },
1116
+ {
1117
+ "epoch": 90.0,
1118
+ "eval_f1": 0.6836321573163678,
1119
+ "eval_loss": 1.321544885635376,
1120
+ "eval_precision": 0.6944770857814336,
1121
+ "eval_recall": 0.673120728929385,
1122
+ "eval_runtime": 0.1062,
1123
+ "eval_samples_per_second": 188.379,
1124
+ "eval_steps_per_second": 18.838,
1125
+ "step": 900
1126
+ },
1127
+ {
1128
+ "epoch": 91.0,
1129
+ "eval_f1": 0.6844238563983788,
1130
+ "eval_loss": 1.3238760232925415,
1131
+ "eval_precision": 0.696113074204947,
1132
+ "eval_recall": 0.673120728929385,
1133
+ "eval_runtime": 0.1075,
1134
+ "eval_samples_per_second": 186.029,
1135
+ "eval_steps_per_second": 18.603,
1136
+ "step": 910
1137
+ },
1138
+ {
1139
+ "epoch": 92.0,
1140
+ "eval_f1": 0.6825028968713789,
1141
+ "eval_loss": 1.3248389959335327,
1142
+ "eval_precision": 0.6945754716981132,
1143
+ "eval_recall": 0.6708428246013668,
1144
+ "eval_runtime": 0.1058,
1145
+ "eval_samples_per_second": 189.077,
1146
+ "eval_steps_per_second": 18.908,
1147
+ "step": 920
1148
+ },
1149
+ {
1150
+ "epoch": 93.0,
1151
+ "eval_f1": 0.6801853997682503,
1152
+ "eval_loss": 1.326583743095398,
1153
+ "eval_precision": 0.6922169811320755,
1154
+ "eval_recall": 0.6685649202733486,
1155
+ "eval_runtime": 0.1054,
1156
+ "eval_samples_per_second": 189.799,
1157
+ "eval_steps_per_second": 18.98,
1158
+ "step": 930
1159
+ },
1160
+ {
1161
+ "epoch": 94.0,
1162
+ "eval_f1": 0.6836321573163678,
1163
+ "eval_loss": 1.3237107992172241,
1164
+ "eval_precision": 0.6944770857814336,
1165
+ "eval_recall": 0.673120728929385,
1166
+ "eval_runtime": 0.1053,
1167
+ "eval_samples_per_second": 189.896,
1168
+ "eval_steps_per_second": 18.99,
1169
+ "step": 940
1170
+ },
1171
+ {
1172
+ "epoch": 95.0,
1173
+ "grad_norm": 0.33692023158073425,
1174
+ "learning_rate": 1.111111111111111e-06,
1175
+ "loss": 0.0451,
1176
+ "step": 950
1177
+ },
1178
+ {
1179
+ "epoch": 95.0,
1180
+ "eval_f1": 0.6836321573163678,
1181
+ "eval_loss": 1.323667049407959,
1182
+ "eval_precision": 0.6944770857814336,
1183
+ "eval_recall": 0.673120728929385,
1184
+ "eval_runtime": 0.1052,
1185
+ "eval_samples_per_second": 190.154,
1186
+ "eval_steps_per_second": 19.015,
1187
+ "step": 950
1188
+ },
1189
+ {
1190
+ "epoch": 96.0,
1191
+ "eval_f1": 0.6840579710144927,
1192
+ "eval_loss": 1.3248512744903564,
1193
+ "eval_precision": 0.6965761511216056,
1194
+ "eval_recall": 0.6719817767653758,
1195
+ "eval_runtime": 0.1046,
1196
+ "eval_samples_per_second": 191.223,
1197
+ "eval_steps_per_second": 19.122,
1198
+ "step": 960
1199
+ },
1200
+ {
1201
+ "epoch": 97.0,
1202
+ "eval_f1": 0.6848520023215323,
1203
+ "eval_loss": 1.326843023300171,
1204
+ "eval_precision": 0.6982248520710059,
1205
+ "eval_recall": 0.6719817767653758,
1206
+ "eval_runtime": 0.1043,
1207
+ "eval_samples_per_second": 191.797,
1208
+ "eval_steps_per_second": 19.18,
1209
+ "step": 970
1210
+ },
1211
+ {
1212
+ "epoch": 98.0,
1213
+ "eval_f1": 0.6852173913043478,
1214
+ "eval_loss": 1.3268158435821533,
1215
+ "eval_precision": 0.6977567886658795,
1216
+ "eval_recall": 0.673120728929385,
1217
+ "eval_runtime": 0.1047,
1218
+ "eval_samples_per_second": 190.969,
1219
+ "eval_steps_per_second": 19.097,
1220
+ "step": 980
1221
+ },
1222
+ {
1223
+ "epoch": 99.0,
1224
+ "eval_f1": 0.6848203939745077,
1225
+ "eval_loss": 1.3256183862686157,
1226
+ "eval_precision": 0.6969339622641509,
1227
+ "eval_recall": 0.673120728929385,
1228
+ "eval_runtime": 0.1053,
1229
+ "eval_samples_per_second": 189.91,
1230
+ "eval_steps_per_second": 18.991,
1231
+ "step": 990
1232
+ },
1233
+ {
1234
+ "epoch": 100.0,
1235
+ "grad_norm": 0.3960552215576172,
1236
+ "learning_rate": 0.0,
1237
+ "loss": 0.0445,
1238
+ "step": 1000
1239
+ },
1240
+ {
1241
+ "epoch": 100.0,
1242
+ "eval_f1": 0.6848203939745077,
1243
+ "eval_loss": 1.3252696990966797,
1244
+ "eval_precision": 0.6969339622641509,
1245
+ "eval_recall": 0.673120728929385,
1246
+ "eval_runtime": 0.1112,
1247
+ "eval_samples_per_second": 179.827,
1248
+ "eval_steps_per_second": 17.983,
1249
+ "step": 1000
1250
+ }
1251
+ ],
1252
+ "logging_steps": 50,
1253
+ "max_steps": 1000,
1254
+ "num_input_tokens_seen": 0,
1255
+ "num_train_epochs": 100,
1256
+ "save_steps": 500,
1257
+ "stateful_callbacks": {
1258
+ "TrainerControl": {
1259
+ "args": {
1260
+ "should_epoch_stop": false,
1261
+ "should_evaluate": false,
1262
+ "should_log": false,
1263
+ "should_save": true,
1264
+ "should_training_stop": true
1265
+ },
1266
+ "attributes": {}
1267
+ }
1268
+ },
1269
+ "total_flos": 523292762112000.0,
1270
+ "train_batch_size": 16,
1271
+ "trial_name": null,
1272
+ "trial_params": null
1273
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:752be49abc948927108e2467a489df708fc2c98f0a2eb2cad2df41eff128d5d9
3
+ size 5304
checkpoint-1000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-770/config.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-Activity",
13
+ "1": "B-Administration",
14
+ "2": "B-Age",
15
+ "3": "B-Area",
16
+ "4": "B-Biological_attribute",
17
+ "5": "B-Biological_structure",
18
+ "6": "B-Clinical_event",
19
+ "7": "B-Color",
20
+ "8": "B-Coreference",
21
+ "9": "B-Date",
22
+ "10": "B-Detailed_description",
23
+ "11": "B-Diagnostic_procedure",
24
+ "12": "B-Disease_disorder",
25
+ "13": "B-Distance",
26
+ "14": "B-Dosage",
27
+ "15": "B-Duration",
28
+ "16": "B-Family_history",
29
+ "17": "B-Frequency",
30
+ "18": "B-Height",
31
+ "19": "B-History",
32
+ "20": "B-Lab_value",
33
+ "21": "B-Mass",
34
+ "22": "B-Medication",
35
+ "23": "B-Nonbiological_location",
36
+ "24": "B-Occupation",
37
+ "25": "B-Other_entity",
38
+ "26": "B-Other_event",
39
+ "27": "B-Outcome",
40
+ "28": "B-Personal_background",
41
+ "29": "B-Qualitative_concept",
42
+ "30": "B-Quantitative_concept",
43
+ "31": "B-Severity",
44
+ "32": "B-Sex",
45
+ "33": "B-Shape",
46
+ "34": "B-Sign_symptom",
47
+ "35": "B-Subject",
48
+ "36": "B-Texture",
49
+ "37": "B-Therapeutic_procedure",
50
+ "38": "B-Time",
51
+ "39": "B-Volume",
52
+ "40": "B-Weight",
53
+ "41": "I-Activity",
54
+ "42": "I-Administration",
55
+ "43": "I-Age",
56
+ "44": "I-Area",
57
+ "45": "I-Biological_structure",
58
+ "46": "I-Clinical_event",
59
+ "47": "I-Coreference",
60
+ "48": "I-Date",
61
+ "49": "I-Detailed_description",
62
+ "50": "I-Diagnostic_procedure",
63
+ "51": "I-Disease_disorder",
64
+ "52": "I-Distance",
65
+ "53": "I-Dosage",
66
+ "54": "I-Duration",
67
+ "55": "I-Family_history",
68
+ "56": "I-History",
69
+ "57": "I-Lab_value",
70
+ "58": "I-Mass",
71
+ "59": "I-Medication",
72
+ "60": "I-Nonbiological_location",
73
+ "61": "I-Other_entity",
74
+ "62": "I-Outcome",
75
+ "63": "I-Personal_background",
76
+ "64": "I-Quantitative_concept",
77
+ "65": "I-Severity",
78
+ "66": "I-Sex",
79
+ "67": "I-Sign_symptom",
80
+ "68": "I-Subject",
81
+ "69": "I-Texture",
82
+ "70": "I-Therapeutic_procedure",
83
+ "71": "I-Time",
84
+ "72": "I-Volume",
85
+ "73": "O"
86
+ },
87
+ "initializer_range": 0.02,
88
+ "label2id": {
89
+ "B-Activity": 0,
90
+ "B-Administration": 1,
91
+ "B-Age": 2,
92
+ "B-Area": 3,
93
+ "B-Biological_attribute": 4,
94
+ "B-Biological_structure": 5,
95
+ "B-Clinical_event": 6,
96
+ "B-Color": 7,
97
+ "B-Coreference": 8,
98
+ "B-Date": 9,
99
+ "B-Detailed_description": 10,
100
+ "B-Diagnostic_procedure": 11,
101
+ "B-Disease_disorder": 12,
102
+ "B-Distance": 13,
103
+ "B-Dosage": 14,
104
+ "B-Duration": 15,
105
+ "B-Family_history": 16,
106
+ "B-Frequency": 17,
107
+ "B-Height": 18,
108
+ "B-History": 19,
109
+ "B-Lab_value": 20,
110
+ "B-Mass": 21,
111
+ "B-Medication": 22,
112
+ "B-Nonbiological_location": 23,
113
+ "B-Occupation": 24,
114
+ "B-Other_entity": 25,
115
+ "B-Other_event": 26,
116
+ "B-Outcome": 27,
117
+ "B-Personal_background": 28,
118
+ "B-Qualitative_concept": 29,
119
+ "B-Quantitative_concept": 30,
120
+ "B-Severity": 31,
121
+ "B-Sex": 32,
122
+ "B-Shape": 33,
123
+ "B-Sign_symptom": 34,
124
+ "B-Subject": 35,
125
+ "B-Texture": 36,
126
+ "B-Therapeutic_procedure": 37,
127
+ "B-Time": 38,
128
+ "B-Volume": 39,
129
+ "B-Weight": 40,
130
+ "I-Activity": 41,
131
+ "I-Administration": 42,
132
+ "I-Age": 43,
133
+ "I-Area": 44,
134
+ "I-Biological_structure": 45,
135
+ "I-Clinical_event": 46,
136
+ "I-Coreference": 47,
137
+ "I-Date": 48,
138
+ "I-Detailed_description": 49,
139
+ "I-Diagnostic_procedure": 50,
140
+ "I-Disease_disorder": 51,
141
+ "I-Distance": 52,
142
+ "I-Dosage": 53,
143
+ "I-Duration": 54,
144
+ "I-Family_history": 55,
145
+ "I-History": 56,
146
+ "I-Lab_value": 57,
147
+ "I-Mass": 58,
148
+ "I-Medication": 59,
149
+ "I-Nonbiological_location": 60,
150
+ "I-Other_entity": 61,
151
+ "I-Outcome": 62,
152
+ "I-Personal_background": 63,
153
+ "I-Quantitative_concept": 64,
154
+ "I-Severity": 65,
155
+ "I-Sex": 66,
156
+ "I-Sign_symptom": 67,
157
+ "I-Subject": 68,
158
+ "I-Texture": 69,
159
+ "I-Therapeutic_procedure": 70,
160
+ "I-Time": 71,
161
+ "I-Volume": 72,
162
+ "O": 73
163
+ },
164
+ "max_position_embeddings": 512,
165
+ "model_type": "distilbert",
166
+ "n_heads": 12,
167
+ "n_layers": 6,
168
+ "pad_token_id": 0,
169
+ "qa_dropout": 0.1,
170
+ "seq_classif_dropout": 0.2,
171
+ "sinusoidal_pos_embds": false,
172
+ "tie_weights_": true,
173
+ "torch_dtype": "float32",
174
+ "transformers_version": "4.49.0",
175
+ "vocab_size": 30522
176
+ }
checkpoint-770/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326f38cbbe8b00407d06f53521d4910fcb093e796b288f95c3aef96f06cc66a9
3
+ size 265691496
checkpoint-770/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da32f62356caaccc4e811a59680b274a7aefdd85a8e22e90d77e216351e85b11
3
+ size 531440954
checkpoint-770/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56a8bf119fed612b50bb9bf41da5743f4e5076a930e56fb6dfdfa4736b2503
3
+ size 13990
checkpoint-770/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f3b3e477747577ebcf7c540abe93633653f5162401d45c09c21e1fd38c550ce
3
+ size 1064
checkpoint-770/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-770/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-770/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-770/trainer_state.json ADDED
@@ -0,0 +1,985 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6937354988399073,
3
+ "best_model_checkpoint": "outputs/models/distilbert-clinical-ner/checkpoint-770",
4
+ "epoch": 77.0,
5
+ "eval_steps": 500,
6
+ "global_step": 770,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_f1": 0.019590382902938554,
14
+ "eval_loss": 4.235354900360107,
15
+ "eval_precision": 0.01608187134502924,
16
+ "eval_recall": 0.025056947608200455,
17
+ "eval_runtime": 0.1555,
18
+ "eval_samples_per_second": 128.613,
19
+ "eval_steps_per_second": 12.861,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_f1": 0.023399014778325126,
25
+ "eval_loss": 4.040920734405518,
26
+ "eval_precision": 0.02546916890080429,
27
+ "eval_recall": 0.02164009111617312,
28
+ "eval_runtime": 0.1064,
29
+ "eval_samples_per_second": 187.984,
30
+ "eval_steps_per_second": 18.798,
31
+ "step": 20
32
+ },
33
+ {
34
+ "epoch": 3.0,
35
+ "eval_f1": 0.0045045045045045045,
36
+ "eval_loss": 3.6175544261932373,
37
+ "eval_precision": 0.2,
38
+ "eval_recall": 0.002277904328018223,
39
+ "eval_runtime": 0.1035,
40
+ "eval_samples_per_second": 193.236,
41
+ "eval_steps_per_second": 19.324,
42
+ "step": 30
43
+ },
44
+ {
45
+ "epoch": 4.0,
46
+ "eval_f1": 0.0,
47
+ "eval_loss": 3.0632073879241943,
48
+ "eval_precision": 0.0,
49
+ "eval_recall": 0.0,
50
+ "eval_runtime": 0.1056,
51
+ "eval_samples_per_second": 189.304,
52
+ "eval_steps_per_second": 18.93,
53
+ "step": 40
54
+ },
55
+ {
56
+ "epoch": 5.0,
57
+ "grad_norm": 2.285052537918091,
58
+ "learning_rate": 1e-05,
59
+ "loss": 3.7184,
60
+ "step": 50
61
+ },
62
+ {
63
+ "epoch": 5.0,
64
+ "eval_f1": 0.0,
65
+ "eval_loss": 2.6725738048553467,
66
+ "eval_precision": 0.0,
67
+ "eval_recall": 0.0,
68
+ "eval_runtime": 0.1038,
69
+ "eval_samples_per_second": 192.708,
70
+ "eval_steps_per_second": 19.271,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 6.0,
75
+ "eval_f1": 0.0022753128555176336,
76
+ "eval_loss": 2.3172459602355957,
77
+ "eval_precision": 1.0,
78
+ "eval_recall": 0.0011389521640091116,
79
+ "eval_runtime": 0.1056,
80
+ "eval_samples_per_second": 189.369,
81
+ "eval_steps_per_second": 18.937,
82
+ "step": 60
83
+ },
84
+ {
85
+ "epoch": 7.0,
86
+ "eval_f1": 0.24705882352941175,
87
+ "eval_loss": 2.010183334350586,
88
+ "eval_precision": 0.34854771784232363,
89
+ "eval_recall": 0.19134396355353075,
90
+ "eval_runtime": 0.1045,
91
+ "eval_samples_per_second": 191.384,
92
+ "eval_steps_per_second": 19.138,
93
+ "step": 70
94
+ },
95
+ {
96
+ "epoch": 8.0,
97
+ "eval_f1": 0.3578104138851802,
98
+ "eval_loss": 1.7770382165908813,
99
+ "eval_precision": 0.432258064516129,
100
+ "eval_recall": 0.3052391799544419,
101
+ "eval_runtime": 0.1043,
102
+ "eval_samples_per_second": 191.78,
103
+ "eval_steps_per_second": 19.178,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 9.0,
108
+ "eval_f1": 0.4306709265175719,
109
+ "eval_loss": 1.578804612159729,
110
+ "eval_precision": 0.49053857350800584,
111
+ "eval_recall": 0.3838268792710706,
112
+ "eval_runtime": 0.1049,
113
+ "eval_samples_per_second": 190.736,
114
+ "eval_steps_per_second": 19.074,
115
+ "step": 90
116
+ },
117
+ {
118
+ "epoch": 10.0,
119
+ "grad_norm": 1.6135512590408325,
120
+ "learning_rate": 2e-05,
121
+ "loss": 2.0341,
122
+ "step": 100
123
+ },
124
+ {
125
+ "epoch": 10.0,
126
+ "eval_f1": 0.5129151291512916,
127
+ "eval_loss": 1.4062659740447998,
128
+ "eval_precision": 0.5574866310160428,
129
+ "eval_recall": 0.47494305239179957,
130
+ "eval_runtime": 0.1053,
131
+ "eval_samples_per_second": 189.876,
132
+ "eval_steps_per_second": 18.988,
133
+ "step": 100
134
+ },
135
+ {
136
+ "epoch": 11.0,
137
+ "eval_f1": 0.5652435357787132,
138
+ "eval_loss": 1.2971620559692383,
139
+ "eval_precision": 0.5987261146496815,
140
+ "eval_recall": 0.5353075170842825,
141
+ "eval_runtime": 0.1046,
142
+ "eval_samples_per_second": 191.215,
143
+ "eval_steps_per_second": 19.122,
144
+ "step": 110
145
+ },
146
+ {
147
+ "epoch": 12.0,
148
+ "eval_f1": 0.5844155844155845,
149
+ "eval_loss": 1.22593092918396,
150
+ "eval_precision": 0.6066176470588235,
151
+ "eval_recall": 0.5637813211845103,
152
+ "eval_runtime": 0.1052,
153
+ "eval_samples_per_second": 190.037,
154
+ "eval_steps_per_second": 19.004,
155
+ "step": 120
156
+ },
157
+ {
158
+ "epoch": 13.0,
159
+ "eval_f1": 0.6020642201834862,
160
+ "eval_loss": 1.1731911897659302,
161
+ "eval_precision": 0.6062355658198614,
162
+ "eval_recall": 0.5979498861047836,
163
+ "eval_runtime": 0.1047,
164
+ "eval_samples_per_second": 190.934,
165
+ "eval_steps_per_second": 19.093,
166
+ "step": 130
167
+ },
168
+ {
169
+ "epoch": 14.0,
170
+ "eval_f1": 0.6093023255813953,
171
+ "eval_loss": 1.1392405033111572,
172
+ "eval_precision": 0.6223277909738717,
173
+ "eval_recall": 0.5968109339407744,
174
+ "eval_runtime": 0.1076,
175
+ "eval_samples_per_second": 185.839,
176
+ "eval_steps_per_second": 18.584,
177
+ "step": 140
178
+ },
179
+ {
180
+ "epoch": 15.0,
181
+ "grad_norm": 1.8851341009140015,
182
+ "learning_rate": 1.888888888888889e-05,
183
+ "loss": 1.0843,
184
+ "step": 150
185
+ },
186
+ {
187
+ "epoch": 15.0,
188
+ "eval_f1": 0.6329849012775842,
189
+ "eval_loss": 1.1109048128128052,
190
+ "eval_precision": 0.6457345971563981,
191
+ "eval_recall": 0.6207289293849658,
192
+ "eval_runtime": 0.1045,
193
+ "eval_samples_per_second": 191.342,
194
+ "eval_steps_per_second": 19.134,
195
+ "step": 150
196
+ },
197
+ {
198
+ "epoch": 16.0,
199
+ "eval_f1": 0.6323103647944412,
200
+ "eval_loss": 1.111675500869751,
201
+ "eval_precision": 0.6431095406360424,
202
+ "eval_recall": 0.621867881548975,
203
+ "eval_runtime": 0.1064,
204
+ "eval_samples_per_second": 187.947,
205
+ "eval_steps_per_second": 18.795,
206
+ "step": 160
207
+ },
208
+ {
209
+ "epoch": 17.0,
210
+ "eval_f1": 0.6406976744186046,
211
+ "eval_loss": 1.1105479001998901,
212
+ "eval_precision": 0.6543942992874109,
213
+ "eval_recall": 0.6275626423690205,
214
+ "eval_runtime": 0.1038,
215
+ "eval_samples_per_second": 192.723,
216
+ "eval_steps_per_second": 19.272,
217
+ "step": 170
218
+ },
219
+ {
220
+ "epoch": 18.0,
221
+ "eval_f1": 0.6418338108882521,
222
+ "eval_loss": 1.1081936359405518,
223
+ "eval_precision": 0.6459054209919262,
224
+ "eval_recall": 0.6378132118451025,
225
+ "eval_runtime": 0.1093,
226
+ "eval_samples_per_second": 182.962,
227
+ "eval_steps_per_second": 18.296,
228
+ "step": 180
229
+ },
230
+ {
231
+ "epoch": 19.0,
232
+ "eval_f1": 0.656629994209612,
233
+ "eval_loss": 1.0905473232269287,
234
+ "eval_precision": 0.6678445229681979,
235
+ "eval_recall": 0.6457858769931663,
236
+ "eval_runtime": 0.1058,
237
+ "eval_samples_per_second": 189.016,
238
+ "eval_steps_per_second": 18.902,
239
+ "step": 190
240
+ },
241
+ {
242
+ "epoch": 20.0,
243
+ "grad_norm": 2.453874111175537,
244
+ "learning_rate": 1.7777777777777777e-05,
245
+ "loss": 0.6709,
246
+ "step": 200
247
+ },
248
+ {
249
+ "epoch": 20.0,
250
+ "eval_f1": 0.6527536231884057,
251
+ "eval_loss": 1.0979650020599365,
252
+ "eval_precision": 0.6646989374262101,
253
+ "eval_recall": 0.6412300683371298,
254
+ "eval_runtime": 0.1051,
255
+ "eval_samples_per_second": 190.38,
256
+ "eval_steps_per_second": 19.038,
257
+ "step": 200
258
+ },
259
+ {
260
+ "epoch": 21.0,
261
+ "eval_f1": 0.660889659156557,
262
+ "eval_loss": 1.1073075532913208,
263
+ "eval_precision": 0.6705744431418523,
264
+ "eval_recall": 0.6514806378132119,
265
+ "eval_runtime": 0.1063,
266
+ "eval_samples_per_second": 188.158,
267
+ "eval_steps_per_second": 18.816,
268
+ "step": 210
269
+ },
270
+ {
271
+ "epoch": 22.0,
272
+ "eval_f1": 0.6520231213872832,
273
+ "eval_loss": 1.109878420829773,
274
+ "eval_precision": 0.6619718309859155,
275
+ "eval_recall": 0.642369020501139,
276
+ "eval_runtime": 0.1108,
277
+ "eval_samples_per_second": 180.582,
278
+ "eval_steps_per_second": 18.058,
279
+ "step": 220
280
+ },
281
+ {
282
+ "epoch": 23.0,
283
+ "eval_f1": 0.6666666666666666,
284
+ "eval_loss": 1.1079308986663818,
285
+ "eval_precision": 0.6838323353293413,
286
+ "eval_recall": 0.6503416856492027,
287
+ "eval_runtime": 0.1051,
288
+ "eval_samples_per_second": 190.371,
289
+ "eval_steps_per_second": 19.037,
290
+ "step": 230
291
+ },
292
+ {
293
+ "epoch": 24.0,
294
+ "eval_f1": 0.6685714285714285,
295
+ "eval_loss": 1.0995490550994873,
296
+ "eval_precision": 0.6708715596330275,
297
+ "eval_recall": 0.6662870159453302,
298
+ "eval_runtime": 0.1076,
299
+ "eval_samples_per_second": 185.917,
300
+ "eval_steps_per_second": 18.592,
301
+ "step": 240
302
+ },
303
+ {
304
+ "epoch": 25.0,
305
+ "grad_norm": 1.9804632663726807,
306
+ "learning_rate": 1.6666666666666667e-05,
307
+ "loss": 0.4563,
308
+ "step": 250
309
+ },
310
+ {
311
+ "epoch": 25.0,
312
+ "eval_f1": 0.6623831775700935,
313
+ "eval_loss": 1.1412475109100342,
314
+ "eval_precision": 0.6798561151079137,
315
+ "eval_recall": 0.6457858769931663,
316
+ "eval_runtime": 0.1043,
317
+ "eval_samples_per_second": 191.672,
318
+ "eval_steps_per_second": 19.167,
319
+ "step": 250
320
+ },
321
+ {
322
+ "epoch": 26.0,
323
+ "eval_f1": 0.6685878962536023,
324
+ "eval_loss": 1.1206316947937012,
325
+ "eval_precision": 0.676779463243874,
326
+ "eval_recall": 0.6605922551252847,
327
+ "eval_runtime": 0.1025,
328
+ "eval_samples_per_second": 195.213,
329
+ "eval_steps_per_second": 19.521,
330
+ "step": 260
331
+ },
332
+ {
333
+ "epoch": 27.0,
334
+ "eval_f1": 0.674013921113689,
335
+ "eval_loss": 1.1223537921905518,
336
+ "eval_precision": 0.6867612293144209,
337
+ "eval_recall": 0.6617312072892938,
338
+ "eval_runtime": 0.1023,
339
+ "eval_samples_per_second": 195.488,
340
+ "eval_steps_per_second": 19.549,
341
+ "step": 270
342
+ },
343
+ {
344
+ "epoch": 28.0,
345
+ "eval_f1": 0.6787595084844938,
346
+ "eval_loss": 1.1268706321716309,
347
+ "eval_precision": 0.6979542719614922,
348
+ "eval_recall": 0.6605922551252847,
349
+ "eval_runtime": 0.107,
350
+ "eval_samples_per_second": 186.844,
351
+ "eval_steps_per_second": 18.684,
352
+ "step": 280
353
+ },
354
+ {
355
+ "epoch": 29.0,
356
+ "eval_f1": 0.6817391304347827,
357
+ "eval_loss": 1.143689751625061,
358
+ "eval_precision": 0.6942148760330579,
359
+ "eval_recall": 0.6697038724373576,
360
+ "eval_runtime": 0.1048,
361
+ "eval_samples_per_second": 190.916,
362
+ "eval_steps_per_second": 19.092,
363
+ "step": 290
364
+ },
365
+ {
366
+ "epoch": 30.0,
367
+ "grad_norm": 1.2570631504058838,
368
+ "learning_rate": 1.555555555555556e-05,
369
+ "loss": 0.3229,
370
+ "step": 300
371
+ },
372
+ {
373
+ "epoch": 30.0,
374
+ "eval_f1": 0.6781807714450201,
375
+ "eval_loss": 1.1411770582199097,
376
+ "eval_precision": 0.6856810244470314,
377
+ "eval_recall": 0.6708428246013668,
378
+ "eval_runtime": 0.1118,
379
+ "eval_samples_per_second": 178.865,
380
+ "eval_steps_per_second": 17.887,
381
+ "step": 300
382
+ },
383
+ {
384
+ "epoch": 31.0,
385
+ "eval_f1": 0.6797235023041475,
386
+ "eval_loss": 1.1665282249450684,
387
+ "eval_precision": 0.6876456876456877,
388
+ "eval_recall": 0.6719817767653758,
389
+ "eval_runtime": 0.1061,
390
+ "eval_samples_per_second": 188.516,
391
+ "eval_steps_per_second": 18.852,
392
+ "step": 310
393
+ },
394
+ {
395
+ "epoch": 32.0,
396
+ "eval_f1": 0.6785714285714286,
397
+ "eval_loss": 1.147858738899231,
398
+ "eval_precision": 0.6864801864801865,
399
+ "eval_recall": 0.6708428246013668,
400
+ "eval_runtime": 0.1079,
401
+ "eval_samples_per_second": 185.36,
402
+ "eval_steps_per_second": 18.536,
403
+ "step": 320
404
+ },
405
+ {
406
+ "epoch": 33.0,
407
+ "eval_f1": 0.6774193548387096,
408
+ "eval_loss": 1.1554282903671265,
409
+ "eval_precision": 0.6853146853146853,
410
+ "eval_recall": 0.6697038724373576,
411
+ "eval_runtime": 0.1037,
412
+ "eval_samples_per_second": 192.925,
413
+ "eval_steps_per_second": 19.292,
414
+ "step": 330
415
+ },
416
+ {
417
+ "epoch": 34.0,
418
+ "eval_f1": 0.6797687861271676,
419
+ "eval_loss": 1.15742027759552,
420
+ "eval_precision": 0.6901408450704225,
421
+ "eval_recall": 0.6697038724373576,
422
+ "eval_runtime": 0.1053,
423
+ "eval_samples_per_second": 189.904,
424
+ "eval_steps_per_second": 18.99,
425
+ "step": 340
426
+ },
427
+ {
428
+ "epoch": 35.0,
429
+ "grad_norm": 1.0945429801940918,
430
+ "learning_rate": 1.4444444444444446e-05,
431
+ "loss": 0.2396,
432
+ "step": 350
433
+ },
434
+ {
435
+ "epoch": 35.0,
436
+ "eval_f1": 0.6797235023041475,
437
+ "eval_loss": 1.1754865646362305,
438
+ "eval_precision": 0.6876456876456877,
439
+ "eval_recall": 0.6719817767653758,
440
+ "eval_runtime": 0.1057,
441
+ "eval_samples_per_second": 189.199,
442
+ "eval_steps_per_second": 18.92,
443
+ "step": 350
444
+ },
445
+ {
446
+ "epoch": 36.0,
447
+ "eval_f1": 0.6735870818915801,
448
+ "eval_loss": 1.1885067224502563,
449
+ "eval_precision": 0.6822429906542056,
450
+ "eval_recall": 0.6651480637813212,
451
+ "eval_runtime": 0.1,
452
+ "eval_samples_per_second": 200.0,
453
+ "eval_steps_per_second": 20.0,
454
+ "step": 360
455
+ },
456
+ {
457
+ "epoch": 37.0,
458
+ "eval_f1": 0.6806526806526807,
459
+ "eval_loss": 1.181584119796753,
460
+ "eval_precision": 0.6968973747016707,
461
+ "eval_recall": 0.6651480637813212,
462
+ "eval_runtime": 0.1076,
463
+ "eval_samples_per_second": 185.858,
464
+ "eval_steps_per_second": 18.586,
465
+ "step": 370
466
+ },
467
+ {
468
+ "epoch": 38.0,
469
+ "eval_f1": 0.6787172011661807,
470
+ "eval_loss": 1.1876161098480225,
471
+ "eval_precision": 0.6953405017921147,
472
+ "eval_recall": 0.662870159453303,
473
+ "eval_runtime": 0.1161,
474
+ "eval_samples_per_second": 172.269,
475
+ "eval_steps_per_second": 17.227,
476
+ "step": 380
477
+ },
478
+ {
479
+ "epoch": 39.0,
480
+ "eval_f1": 0.6818713450292397,
481
+ "eval_loss": 1.2116471529006958,
482
+ "eval_precision": 0.7007211538461539,
483
+ "eval_recall": 0.664009111617312,
484
+ "eval_runtime": 0.1027,
485
+ "eval_samples_per_second": 194.71,
486
+ "eval_steps_per_second": 19.471,
487
+ "step": 390
488
+ },
489
+ {
490
+ "epoch": 40.0,
491
+ "grad_norm": 0.8599975109100342,
492
+ "learning_rate": 1.3333333333333333e-05,
493
+ "loss": 0.1848,
494
+ "step": 400
495
+ },
496
+ {
497
+ "epoch": 40.0,
498
+ "eval_f1": 0.6786542923433874,
499
+ "eval_loss": 1.1997044086456299,
500
+ "eval_precision": 0.6914893617021277,
501
+ "eval_recall": 0.6662870159453302,
502
+ "eval_runtime": 0.1098,
503
+ "eval_samples_per_second": 182.124,
504
+ "eval_steps_per_second": 18.212,
505
+ "step": 400
506
+ },
507
+ {
508
+ "epoch": 41.0,
509
+ "eval_f1": 0.6817917393833625,
510
+ "eval_loss": 1.2281190156936646,
511
+ "eval_precision": 0.6967895362663495,
512
+ "eval_recall": 0.6674259681093394,
513
+ "eval_runtime": 0.1026,
514
+ "eval_samples_per_second": 195.027,
515
+ "eval_steps_per_second": 19.503,
516
+ "step": 410
517
+ },
518
+ {
519
+ "epoch": 42.0,
520
+ "eval_f1": 0.6820542412002308,
521
+ "eval_loss": 1.205224633216858,
522
+ "eval_precision": 0.6912280701754386,
523
+ "eval_recall": 0.673120728929385,
524
+ "eval_runtime": 0.1029,
525
+ "eval_samples_per_second": 194.454,
526
+ "eval_steps_per_second": 19.445,
527
+ "step": 420
528
+ },
529
+ {
530
+ "epoch": 43.0,
531
+ "eval_f1": 0.6834112149532711,
532
+ "eval_loss": 1.2441879510879517,
533
+ "eval_precision": 0.7014388489208633,
534
+ "eval_recall": 0.6662870159453302,
535
+ "eval_runtime": 0.1013,
536
+ "eval_samples_per_second": 197.51,
537
+ "eval_steps_per_second": 19.751,
538
+ "step": 430
539
+ },
540
+ {
541
+ "epoch": 44.0,
542
+ "eval_f1": 0.6762672811059909,
543
+ "eval_loss": 1.2410287857055664,
544
+ "eval_precision": 0.6841491841491841,
545
+ "eval_recall": 0.6685649202733486,
546
+ "eval_runtime": 0.1039,
547
+ "eval_samples_per_second": 192.454,
548
+ "eval_steps_per_second": 19.245,
549
+ "step": 440
550
+ },
551
+ {
552
+ "epoch": 45.0,
553
+ "grad_norm": 0.8666434288024902,
554
+ "learning_rate": 1.2222222222222224e-05,
555
+ "loss": 0.1472,
556
+ "step": 450
557
+ },
558
+ {
559
+ "epoch": 45.0,
560
+ "eval_f1": 0.6809744779582366,
561
+ "eval_loss": 1.2373775243759155,
562
+ "eval_precision": 0.693853427895981,
563
+ "eval_recall": 0.6685649202733486,
564
+ "eval_runtime": 0.1016,
565
+ "eval_samples_per_second": 196.912,
566
+ "eval_steps_per_second": 19.691,
567
+ "step": 450
568
+ },
569
+ {
570
+ "epoch": 46.0,
571
+ "eval_f1": 0.6786961583236321,
572
+ "eval_loss": 1.237067461013794,
573
+ "eval_precision": 0.694047619047619,
574
+ "eval_recall": 0.664009111617312,
575
+ "eval_runtime": 0.1002,
576
+ "eval_samples_per_second": 199.663,
577
+ "eval_steps_per_second": 19.966,
578
+ "step": 460
579
+ },
580
+ {
581
+ "epoch": 47.0,
582
+ "eval_f1": 0.6889016676250718,
583
+ "eval_loss": 1.2270065546035767,
584
+ "eval_precision": 0.6957026713124274,
585
+ "eval_recall": 0.6822323462414579,
586
+ "eval_runtime": 0.1051,
587
+ "eval_samples_per_second": 190.295,
588
+ "eval_steps_per_second": 19.03,
589
+ "step": 470
590
+ },
591
+ {
592
+ "epoch": 48.0,
593
+ "eval_f1": 0.6805555555555555,
594
+ "eval_loss": 1.238765001296997,
595
+ "eval_precision": 0.691764705882353,
596
+ "eval_recall": 0.6697038724373576,
597
+ "eval_runtime": 0.1066,
598
+ "eval_samples_per_second": 187.677,
599
+ "eval_steps_per_second": 18.768,
600
+ "step": 480
601
+ },
602
+ {
603
+ "epoch": 49.0,
604
+ "eval_f1": 0.6790914385556202,
605
+ "eval_loss": 1.2525634765625,
606
+ "eval_precision": 0.6948748510131109,
607
+ "eval_recall": 0.664009111617312,
608
+ "eval_runtime": 0.1021,
609
+ "eval_samples_per_second": 195.942,
610
+ "eval_steps_per_second": 19.594,
611
+ "step": 490
612
+ },
613
+ {
614
+ "epoch": 50.0,
615
+ "grad_norm": 0.8096102476119995,
616
+ "learning_rate": 1.1111111111111113e-05,
617
+ "loss": 0.1184,
618
+ "step": 500
619
+ },
620
+ {
621
+ "epoch": 50.0,
622
+ "eval_f1": 0.681421083284799,
623
+ "eval_loss": 1.2390888929367065,
624
+ "eval_precision": 0.6972586412395709,
625
+ "eval_recall": 0.6662870159453302,
626
+ "eval_runtime": 0.1042,
627
+ "eval_samples_per_second": 191.926,
628
+ "eval_steps_per_second": 19.193,
629
+ "step": 500
630
+ },
631
+ {
632
+ "epoch": 51.0,
633
+ "eval_f1": 0.6782810685249709,
634
+ "eval_loss": 1.2617552280426025,
635
+ "eval_precision": 0.6919431279620853,
636
+ "eval_recall": 0.6651480637813212,
637
+ "eval_runtime": 0.1011,
638
+ "eval_samples_per_second": 197.736,
639
+ "eval_steps_per_second": 19.774,
640
+ "step": 510
641
+ },
642
+ {
643
+ "epoch": 52.0,
644
+ "eval_f1": 0.6814469078179697,
645
+ "eval_loss": 1.2573115825653076,
646
+ "eval_precision": 0.6985645933014354,
647
+ "eval_recall": 0.6651480637813212,
648
+ "eval_runtime": 0.1033,
649
+ "eval_samples_per_second": 193.643,
650
+ "eval_steps_per_second": 19.364,
651
+ "step": 520
652
+ },
653
+ {
654
+ "epoch": 53.0,
655
+ "eval_f1": 0.681369704004643,
656
+ "eval_loss": 1.2631280422210693,
657
+ "eval_precision": 0.6946745562130178,
658
+ "eval_recall": 0.6685649202733486,
659
+ "eval_runtime": 0.1027,
660
+ "eval_samples_per_second": 194.742,
661
+ "eval_steps_per_second": 19.474,
662
+ "step": 530
663
+ },
664
+ {
665
+ "epoch": 54.0,
666
+ "eval_f1": 0.6822429906542057,
667
+ "eval_loss": 1.2612630128860474,
668
+ "eval_precision": 0.7002398081534772,
669
+ "eval_recall": 0.6651480637813212,
670
+ "eval_runtime": 0.1034,
671
+ "eval_samples_per_second": 193.355,
672
+ "eval_steps_per_second": 19.335,
673
+ "step": 540
674
+ },
675
+ {
676
+ "epoch": 55.0,
677
+ "grad_norm": 1.258770227432251,
678
+ "learning_rate": 1e-05,
679
+ "loss": 0.0965,
680
+ "step": 550
681
+ },
682
+ {
683
+ "epoch": 55.0,
684
+ "eval_f1": 0.678592036930179,
685
+ "eval_loss": 1.2565994262695312,
686
+ "eval_precision": 0.6877192982456141,
687
+ "eval_recall": 0.6697038724373576,
688
+ "eval_runtime": 0.1065,
689
+ "eval_samples_per_second": 187.769,
690
+ "eval_steps_per_second": 18.777,
691
+ "step": 550
692
+ },
693
+ {
694
+ "epoch": 56.0,
695
+ "eval_f1": 0.6720461095100864,
696
+ "eval_loss": 1.286030888557434,
697
+ "eval_precision": 0.6802800466744457,
698
+ "eval_recall": 0.664009111617312,
699
+ "eval_runtime": 0.1017,
700
+ "eval_samples_per_second": 196.637,
701
+ "eval_steps_per_second": 19.664,
702
+ "step": 560
703
+ },
704
+ {
705
+ "epoch": 57.0,
706
+ "eval_f1": 0.6774941995359629,
707
+ "eval_loss": 1.2746167182922363,
708
+ "eval_precision": 0.6903073286052009,
709
+ "eval_recall": 0.6651480637813212,
710
+ "eval_runtime": 0.104,
711
+ "eval_samples_per_second": 192.291,
712
+ "eval_steps_per_second": 19.229,
713
+ "step": 570
714
+ },
715
+ {
716
+ "epoch": 58.0,
717
+ "eval_f1": 0.6813441483198146,
718
+ "eval_loss": 1.2719839811325073,
719
+ "eval_precision": 0.6933962264150944,
720
+ "eval_recall": 0.6697038724373576,
721
+ "eval_runtime": 0.1009,
722
+ "eval_samples_per_second": 198.12,
723
+ "eval_steps_per_second": 19.812,
724
+ "step": 580
725
+ },
726
+ {
727
+ "epoch": 59.0,
728
+ "eval_f1": 0.6845168800931315,
729
+ "eval_loss": 1.2790720462799072,
730
+ "eval_precision": 0.7,
731
+ "eval_recall": 0.6697038724373576,
732
+ "eval_runtime": 0.1025,
733
+ "eval_samples_per_second": 195.09,
734
+ "eval_steps_per_second": 19.509,
735
+ "step": 590
736
+ },
737
+ {
738
+ "epoch": 60.0,
739
+ "grad_norm": 0.6421855688095093,
740
+ "learning_rate": 8.888888888888888e-06,
741
+ "loss": 0.0841,
742
+ "step": 600
743
+ },
744
+ {
745
+ "epoch": 60.0,
746
+ "eval_f1": 0.6827309236947792,
747
+ "eval_loss": 1.2661317586898804,
748
+ "eval_precision": 0.6878612716763006,
749
+ "eval_recall": 0.6776765375854215,
750
+ "eval_runtime": 0.1054,
751
+ "eval_samples_per_second": 189.834,
752
+ "eval_steps_per_second": 18.983,
753
+ "step": 600
754
+ },
755
+ {
756
+ "epoch": 61.0,
757
+ "eval_f1": 0.6803039158386909,
758
+ "eval_loss": 1.2873255014419556,
759
+ "eval_precision": 0.6986794717887155,
760
+ "eval_recall": 0.662870159453303,
761
+ "eval_runtime": 0.1028,
762
+ "eval_samples_per_second": 194.643,
763
+ "eval_steps_per_second": 19.464,
764
+ "step": 610
765
+ },
766
+ {
767
+ "epoch": 62.0,
768
+ "eval_f1": 0.6809248554913295,
769
+ "eval_loss": 1.2766045331954956,
770
+ "eval_precision": 0.6913145539906104,
771
+ "eval_recall": 0.6708428246013668,
772
+ "eval_runtime": 0.1044,
773
+ "eval_samples_per_second": 191.487,
774
+ "eval_steps_per_second": 19.149,
775
+ "step": 620
776
+ },
777
+ {
778
+ "epoch": 63.0,
779
+ "eval_f1": 0.6755658734764944,
780
+ "eval_loss": 1.2915081977844238,
781
+ "eval_precision": 0.6887573964497041,
782
+ "eval_recall": 0.662870159453303,
783
+ "eval_runtime": 0.1078,
784
+ "eval_samples_per_second": 185.542,
785
+ "eval_steps_per_second": 18.554,
786
+ "step": 630
787
+ },
788
+ {
789
+ "epoch": 64.0,
790
+ "eval_f1": 0.6732101616628174,
791
+ "eval_loss": 1.2793446779251099,
792
+ "eval_precision": 0.6826697892271663,
793
+ "eval_recall": 0.664009111617312,
794
+ "eval_runtime": 0.1016,
795
+ "eval_samples_per_second": 196.942,
796
+ "eval_steps_per_second": 19.694,
797
+ "step": 640
798
+ },
799
+ {
800
+ "epoch": 65.0,
801
+ "grad_norm": 0.5916463136672974,
802
+ "learning_rate": 7.77777777777778e-06,
803
+ "loss": 0.0714,
804
+ "step": 650
805
+ },
806
+ {
807
+ "epoch": 65.0,
808
+ "eval_f1": 0.6820276497695852,
809
+ "eval_loss": 1.3019025325775146,
810
+ "eval_precision": 0.6899766899766899,
811
+ "eval_recall": 0.6742596810933941,
812
+ "eval_runtime": 0.1008,
813
+ "eval_samples_per_second": 198.393,
814
+ "eval_steps_per_second": 19.839,
815
+ "step": 650
816
+ },
817
+ {
818
+ "epoch": 66.0,
819
+ "eval_f1": 0.6853473438412142,
820
+ "eval_loss": 1.2911741733551025,
821
+ "eval_precision": 0.7029940119760479,
822
+ "eval_recall": 0.6685649202733486,
823
+ "eval_runtime": 0.106,
824
+ "eval_samples_per_second": 188.757,
825
+ "eval_steps_per_second": 18.876,
826
+ "step": 660
827
+ },
828
+ {
829
+ "epoch": 67.0,
830
+ "eval_f1": 0.67816091954023,
831
+ "eval_loss": 1.2967917919158936,
832
+ "eval_precision": 0.6844547563805105,
833
+ "eval_recall": 0.6719817767653758,
834
+ "eval_runtime": 0.1092,
835
+ "eval_samples_per_second": 183.175,
836
+ "eval_steps_per_second": 18.318,
837
+ "step": 670
838
+ },
839
+ {
840
+ "epoch": 68.0,
841
+ "eval_f1": 0.6895348837209303,
842
+ "eval_loss": 1.2946228981018066,
843
+ "eval_precision": 0.7042755344418052,
844
+ "eval_recall": 0.6753986332574032,
845
+ "eval_runtime": 0.1052,
846
+ "eval_samples_per_second": 190.167,
847
+ "eval_steps_per_second": 19.017,
848
+ "step": 680
849
+ },
850
+ {
851
+ "epoch": 69.0,
852
+ "eval_f1": 0.6863425925925927,
853
+ "eval_loss": 1.2976857423782349,
854
+ "eval_precision": 0.6976470588235294,
855
+ "eval_recall": 0.6753986332574032,
856
+ "eval_runtime": 0.103,
857
+ "eval_samples_per_second": 194.22,
858
+ "eval_steps_per_second": 19.422,
859
+ "step": 690
860
+ },
861
+ {
862
+ "epoch": 70.0,
863
+ "grad_norm": 0.7995481491088867,
864
+ "learning_rate": 6.666666666666667e-06,
865
+ "loss": 0.064,
866
+ "step": 700
867
+ },
868
+ {
869
+ "epoch": 70.0,
870
+ "eval_f1": 0.6771014492753623,
871
+ "eval_loss": 1.295568585395813,
872
+ "eval_precision": 0.6894923258559622,
873
+ "eval_recall": 0.6651480637813212,
874
+ "eval_runtime": 0.1031,
875
+ "eval_samples_per_second": 193.92,
876
+ "eval_steps_per_second": 19.392,
877
+ "step": 700
878
+ },
879
+ {
880
+ "epoch": 71.0,
881
+ "eval_f1": 0.6797687861271676,
882
+ "eval_loss": 1.2898850440979004,
883
+ "eval_precision": 0.6901408450704225,
884
+ "eval_recall": 0.6697038724373576,
885
+ "eval_runtime": 0.1005,
886
+ "eval_samples_per_second": 198.942,
887
+ "eval_steps_per_second": 19.894,
888
+ "step": 710
889
+ },
890
+ {
891
+ "epoch": 72.0,
892
+ "eval_f1": 0.6806526806526807,
893
+ "eval_loss": 1.3075172901153564,
894
+ "eval_precision": 0.6968973747016707,
895
+ "eval_recall": 0.6651480637813212,
896
+ "eval_runtime": 0.1087,
897
+ "eval_samples_per_second": 184.017,
898
+ "eval_steps_per_second": 18.402,
899
+ "step": 720
900
+ },
901
+ {
902
+ "epoch": 73.0,
903
+ "eval_f1": 0.683750728013978,
904
+ "eval_loss": 1.2973222732543945,
905
+ "eval_precision": 0.699642431466031,
906
+ "eval_recall": 0.6685649202733486,
907
+ "eval_runtime": 0.1054,
908
+ "eval_samples_per_second": 189.672,
909
+ "eval_steps_per_second": 18.967,
910
+ "step": 730
911
+ },
912
+ {
913
+ "epoch": 74.0,
914
+ "eval_f1": 0.6872812135355894,
915
+ "eval_loss": 1.3102946281433105,
916
+ "eval_precision": 0.7045454545454546,
917
+ "eval_recall": 0.6708428246013668,
918
+ "eval_runtime": 0.1064,
919
+ "eval_samples_per_second": 188.0,
920
+ "eval_steps_per_second": 18.8,
921
+ "step": 740
922
+ },
923
+ {
924
+ "epoch": 75.0,
925
+ "grad_norm": 0.8353786468505859,
926
+ "learning_rate": 5.555555555555557e-06,
927
+ "loss": 0.0575,
928
+ "step": 750
929
+ },
930
+ {
931
+ "epoch": 75.0,
932
+ "eval_f1": 0.6797915460335843,
933
+ "eval_loss": 1.3147099018096924,
934
+ "eval_precision": 0.6914016489988222,
935
+ "eval_recall": 0.6685649202733486,
936
+ "eval_runtime": 0.1049,
937
+ "eval_samples_per_second": 190.589,
938
+ "eval_steps_per_second": 19.059,
939
+ "step": 750
940
+ },
941
+ {
942
+ "epoch": 76.0,
943
+ "eval_f1": 0.6845168800931315,
944
+ "eval_loss": 1.3087732791900635,
945
+ "eval_precision": 0.7,
946
+ "eval_recall": 0.6697038724373576,
947
+ "eval_runtime": 0.105,
948
+ "eval_samples_per_second": 190.4,
949
+ "eval_steps_per_second": 19.04,
950
+ "step": 760
951
+ },
952
+ {
953
+ "epoch": 77.0,
954
+ "eval_f1": 0.6937354988399073,
955
+ "eval_loss": 1.3092721700668335,
956
+ "eval_precision": 0.706855791962175,
957
+ "eval_recall": 0.6810933940774487,
958
+ "eval_runtime": 0.1053,
959
+ "eval_samples_per_second": 189.996,
960
+ "eval_steps_per_second": 19.0,
961
+ "step": 770
962
+ }
963
+ ],
964
+ "logging_steps": 50,
965
+ "max_steps": 1000,
966
+ "num_input_tokens_seen": 0,
967
+ "num_train_epochs": 100,
968
+ "save_steps": 500,
969
+ "stateful_callbacks": {
970
+ "TrainerControl": {
971
+ "args": {
972
+ "should_epoch_stop": false,
973
+ "should_evaluate": false,
974
+ "should_log": false,
975
+ "should_save": true,
976
+ "should_training_stop": false
977
+ },
978
+ "attributes": {}
979
+ }
980
+ },
981
+ "total_flos": 402935426826240.0,
982
+ "train_batch_size": 16,
983
+ "trial_name": null,
984
+ "trial_params": null
985
+ }
checkpoint-770/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:752be49abc948927108e2467a489df708fc2c98f0a2eb2cad2df41eff128d5d9
3
+ size 5304
checkpoint-770/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-Activity",
13
+ "1": "B-Administration",
14
+ "2": "B-Age",
15
+ "3": "B-Area",
16
+ "4": "B-Biological_attribute",
17
+ "5": "B-Biological_structure",
18
+ "6": "B-Clinical_event",
19
+ "7": "B-Color",
20
+ "8": "B-Coreference",
21
+ "9": "B-Date",
22
+ "10": "B-Detailed_description",
23
+ "11": "B-Diagnostic_procedure",
24
+ "12": "B-Disease_disorder",
25
+ "13": "B-Distance",
26
+ "14": "B-Dosage",
27
+ "15": "B-Duration",
28
+ "16": "B-Family_history",
29
+ "17": "B-Frequency",
30
+ "18": "B-Height",
31
+ "19": "B-History",
32
+ "20": "B-Lab_value",
33
+ "21": "B-Mass",
34
+ "22": "B-Medication",
35
+ "23": "B-Nonbiological_location",
36
+ "24": "B-Occupation",
37
+ "25": "B-Other_entity",
38
+ "26": "B-Other_event",
39
+ "27": "B-Outcome",
40
+ "28": "B-Personal_background",
41
+ "29": "B-Qualitative_concept",
42
+ "30": "B-Quantitative_concept",
43
+ "31": "B-Severity",
44
+ "32": "B-Sex",
45
+ "33": "B-Shape",
46
+ "34": "B-Sign_symptom",
47
+ "35": "B-Subject",
48
+ "36": "B-Texture",
49
+ "37": "B-Therapeutic_procedure",
50
+ "38": "B-Time",
51
+ "39": "B-Volume",
52
+ "40": "B-Weight",
53
+ "41": "I-Activity",
54
+ "42": "I-Administration",
55
+ "43": "I-Age",
56
+ "44": "I-Area",
57
+ "45": "I-Biological_structure",
58
+ "46": "I-Clinical_event",
59
+ "47": "I-Coreference",
60
+ "48": "I-Date",
61
+ "49": "I-Detailed_description",
62
+ "50": "I-Diagnostic_procedure",
63
+ "51": "I-Disease_disorder",
64
+ "52": "I-Distance",
65
+ "53": "I-Dosage",
66
+ "54": "I-Duration",
67
+ "55": "I-Family_history",
68
+ "56": "I-History",
69
+ "57": "I-Lab_value",
70
+ "58": "I-Mass",
71
+ "59": "I-Medication",
72
+ "60": "I-Nonbiological_location",
73
+ "61": "I-Other_entity",
74
+ "62": "I-Outcome",
75
+ "63": "I-Personal_background",
76
+ "64": "I-Quantitative_concept",
77
+ "65": "I-Severity",
78
+ "66": "I-Sex",
79
+ "67": "I-Sign_symptom",
80
+ "68": "I-Subject",
81
+ "69": "I-Texture",
82
+ "70": "I-Therapeutic_procedure",
83
+ "71": "I-Time",
84
+ "72": "I-Volume",
85
+ "73": "O"
86
+ },
87
+ "initializer_range": 0.02,
88
+ "label2id": {
89
+ "B-Activity": 0,
90
+ "B-Administration": 1,
91
+ "B-Age": 2,
92
+ "B-Area": 3,
93
+ "B-Biological_attribute": 4,
94
+ "B-Biological_structure": 5,
95
+ "B-Clinical_event": 6,
96
+ "B-Color": 7,
97
+ "B-Coreference": 8,
98
+ "B-Date": 9,
99
+ "B-Detailed_description": 10,
100
+ "B-Diagnostic_procedure": 11,
101
+ "B-Disease_disorder": 12,
102
+ "B-Distance": 13,
103
+ "B-Dosage": 14,
104
+ "B-Duration": 15,
105
+ "B-Family_history": 16,
106
+ "B-Frequency": 17,
107
+ "B-Height": 18,
108
+ "B-History": 19,
109
+ "B-Lab_value": 20,
110
+ "B-Mass": 21,
111
+ "B-Medication": 22,
112
+ "B-Nonbiological_location": 23,
113
+ "B-Occupation": 24,
114
+ "B-Other_entity": 25,
115
+ "B-Other_event": 26,
116
+ "B-Outcome": 27,
117
+ "B-Personal_background": 28,
118
+ "B-Qualitative_concept": 29,
119
+ "B-Quantitative_concept": 30,
120
+ "B-Severity": 31,
121
+ "B-Sex": 32,
122
+ "B-Shape": 33,
123
+ "B-Sign_symptom": 34,
124
+ "B-Subject": 35,
125
+ "B-Texture": 36,
126
+ "B-Therapeutic_procedure": 37,
127
+ "B-Time": 38,
128
+ "B-Volume": 39,
129
+ "B-Weight": 40,
130
+ "I-Activity": 41,
131
+ "I-Administration": 42,
132
+ "I-Age": 43,
133
+ "I-Area": 44,
134
+ "I-Biological_structure": 45,
135
+ "I-Clinical_event": 46,
136
+ "I-Coreference": 47,
137
+ "I-Date": 48,
138
+ "I-Detailed_description": 49,
139
+ "I-Diagnostic_procedure": 50,
140
+ "I-Disease_disorder": 51,
141
+ "I-Distance": 52,
142
+ "I-Dosage": 53,
143
+ "I-Duration": 54,
144
+ "I-Family_history": 55,
145
+ "I-History": 56,
146
+ "I-Lab_value": 57,
147
+ "I-Mass": 58,
148
+ "I-Medication": 59,
149
+ "I-Nonbiological_location": 60,
150
+ "I-Other_entity": 61,
151
+ "I-Outcome": 62,
152
+ "I-Personal_background": 63,
153
+ "I-Quantitative_concept": 64,
154
+ "I-Severity": 65,
155
+ "I-Sex": 66,
156
+ "I-Sign_symptom": 67,
157
+ "I-Subject": 68,
158
+ "I-Texture": 69,
159
+ "I-Therapeutic_procedure": 70,
160
+ "I-Time": 71,
161
+ "I-Volume": 72,
162
+ "O": 73
163
+ },
164
+ "max_position_embeddings": 512,
165
+ "model_type": "distilbert",
166
+ "n_heads": 12,
167
+ "n_layers": 6,
168
+ "pad_token_id": 0,
169
+ "qa_dropout": 0.1,
170
+ "seq_classif_dropout": 0.2,
171
+ "sinusoidal_pos_embds": false,
172
+ "tie_weights_": true,
173
+ "torch_dtype": "float32",
174
+ "transformers_version": "4.49.0",
175
+ "vocab_size": 30522
176
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326f38cbbe8b00407d06f53521d4910fcb093e796b288f95c3aef96f06cc66a9
3
+ size 265691496
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:752be49abc948927108e2467a489df708fc2c98f0a2eb2cad2df41eff128d5d9
3
+ size 5304
vocab.txt ADDED
The diff for this file is too large to render. See raw diff