LamaDiab commited on
Commit
c8b08e7
·
1 Parent(s): 8b3a4e3

Training checkpoint - Epoch 3, Step 61224

Browse files
checkpoint-61224/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-61224/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 6,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.53.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
checkpoint-61224/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.2.0",
4
+ "transformers": "4.53.3",
5
+ "pytorch": "2.6.0+cu124"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
checkpoint-61224/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-61224/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
checkpoint-61224/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-61224/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-61224/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "max_length": 128,
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
checkpoint-61224/trainer_state.json ADDED
@@ -0,0 +1,731 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 2000,
7
+ "global_step": 61224,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 4.9000392003136025e-05,
14
+ "grad_norm": NaN,
15
+ "learning_rate": 0.0,
16
+ "loss": 146.2524,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.09800078400627205,
21
+ "grad_norm": 0.11796475946903229,
22
+ "learning_rate": 2.4424301812836847e-06,
23
+ "loss": 32.229,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 0.09800078400627205,
28
+ "eval_cosine_accuracy": 0.6100063465199915,
29
+ "eval_cosine_accuracy_threshold": 0.9997795820236206,
30
+ "eval_cosine_ap": 0.6786677205526788,
31
+ "eval_cosine_f1": 0.6667136862150438,
32
+ "eval_cosine_f1_threshold": 0.991624653339386,
33
+ "eval_cosine_mcc": 0.00975822629781094,
34
+ "eval_cosine_precision": 0.5000793524837327,
35
+ "eval_cosine_recall": 0.9998942246668077,
36
+ "eval_loss": 26.897747039794922,
37
+ "eval_runtime": 22.9309,
38
+ "eval_samples_per_second": 824.566,
39
+ "eval_steps_per_second": 1.614,
40
+ "step": 2000
41
+ },
42
+ {
43
+ "epoch": 0.1960015680125441,
44
+ "grad_norm": 0.013558157719671726,
45
+ "learning_rate": 4.892209701126899e-06,
46
+ "loss": 0.0109,
47
+ "step": 4000
48
+ },
49
+ {
50
+ "epoch": 0.1960015680125441,
51
+ "eval_cosine_accuracy": 0.6240215781679712,
52
+ "eval_cosine_accuracy_threshold": 0.9999226331710815,
53
+ "eval_cosine_ap": 0.6923415808404929,
54
+ "eval_cosine_f1": 0.6666196537498678,
55
+ "eval_cosine_f1_threshold": 0.9939442873001099,
56
+ "eval_cosine_mcc": -0.007272583024577166,
57
+ "eval_cosine_precision": 0.4999735547680753,
58
+ "eval_cosine_recall": 0.9998942246668077,
59
+ "eval_loss": 27.12621307373047,
60
+ "eval_runtime": 23.1359,
61
+ "eval_samples_per_second": 817.257,
62
+ "eval_steps_per_second": 1.599,
63
+ "step": 4000
64
+ },
65
+ {
66
+ "epoch": 0.29400235201881614,
67
+ "grad_norm": 0.0,
68
+ "learning_rate": 4.87678916827853e-06,
69
+ "loss": 0.5788,
70
+ "step": 6000
71
+ },
72
+ {
73
+ "epoch": 0.29400235201881614,
74
+ "eval_cosine_accuracy": 0.5835096255553205,
75
+ "eval_cosine_accuracy_threshold": 0.9478387236595154,
76
+ "eval_cosine_ap": 0.6313955266297183,
77
+ "eval_cosine_f1": 0.6666901731250661,
78
+ "eval_cosine_f1_threshold": -0.09354762732982635,
79
+ "eval_cosine_mcc": 0.007272583024577166,
80
+ "eval_cosine_precision": 0.5000264452319246,
81
+ "eval_cosine_recall": 1.0,
82
+ "eval_loss": 32.61564254760742,
83
+ "eval_runtime": 23.1255,
84
+ "eval_samples_per_second": 817.626,
85
+ "eval_steps_per_second": 1.6,
86
+ "step": 6000
87
+ },
88
+ {
89
+ "epoch": 0.3920031360250882,
90
+ "grad_norm": 0.0,
91
+ "learning_rate": 4.747840103159252e-06,
92
+ "loss": 0.0,
93
+ "step": 8000
94
+ },
95
+ {
96
+ "epoch": 0.3920031360250882,
97
+ "eval_cosine_accuracy": 0.5764226782314364,
98
+ "eval_cosine_accuracy_threshold": 0.953868567943573,
99
+ "eval_cosine_ap": 0.6184434211752825,
100
+ "eval_cosine_f1": 0.6666431593794077,
101
+ "eval_cosine_f1_threshold": -0.09449770301580429,
102
+ "eval_cosine_mcc": 0.0,
103
+ "eval_cosine_precision": 0.5,
104
+ "eval_cosine_recall": 0.9998942246668077,
105
+ "eval_loss": 33.463409423828125,
106
+ "eval_runtime": 24.245,
107
+ "eval_samples_per_second": 779.873,
108
+ "eval_steps_per_second": 1.526,
109
+ "step": 8000
110
+ },
111
+ {
112
+ "epoch": 0.49000392003136023,
113
+ "grad_norm": 0.0,
114
+ "learning_rate": 4.618891038039975e-06,
115
+ "loss": 0.0,
116
+ "step": 10000
117
+ },
118
+ {
119
+ "epoch": 0.49000392003136023,
120
+ "eval_cosine_accuracy": 0.5845673788872435,
121
+ "eval_cosine_accuracy_threshold": 0.9387475848197937,
122
+ "eval_cosine_ap": 0.6273588632357191,
123
+ "eval_cosine_f1": 0.6666196537498678,
124
+ "eval_cosine_f1_threshold": -0.1685822755098343,
125
+ "eval_cosine_mcc": -0.007272583024577166,
126
+ "eval_cosine_precision": 0.4999735547680753,
127
+ "eval_cosine_recall": 0.9998942246668077,
128
+ "eval_loss": 35.26945495605469,
129
+ "eval_runtime": 23.2073,
130
+ "eval_samples_per_second": 814.742,
131
+ "eval_steps_per_second": 1.594,
132
+ "step": 10000
133
+ },
134
+ {
135
+ "epoch": 0.5880047040376323,
136
+ "grad_norm": 0.0,
137
+ "learning_rate": 4.489941972920697e-06,
138
+ "loss": 0.0,
139
+ "step": 12000
140
+ },
141
+ {
142
+ "epoch": 0.5880047040376323,
143
+ "eval_cosine_accuracy": 0.5943515972075312,
144
+ "eval_cosine_accuracy_threshold": 0.9397846460342407,
145
+ "eval_cosine_ap": 0.6521863345575756,
146
+ "eval_cosine_f1": 0.6666196537498678,
147
+ "eval_cosine_f1_threshold": -0.17127826809883118,
148
+ "eval_cosine_mcc": -0.007272583024577166,
149
+ "eval_cosine_precision": 0.4999735547680753,
150
+ "eval_cosine_recall": 0.9998942246668077,
151
+ "eval_loss": 30.115076065063477,
152
+ "eval_runtime": 23.3715,
153
+ "eval_samples_per_second": 809.021,
154
+ "eval_steps_per_second": 1.583,
155
+ "step": 12000
156
+ },
157
+ {
158
+ "epoch": 0.6860054880439044,
159
+ "grad_norm": 0.0,
160
+ "learning_rate": 4.360992907801419e-06,
161
+ "loss": 0.0,
162
+ "step": 14000
163
+ },
164
+ {
165
+ "epoch": 0.6860054880439044,
166
+ "eval_cosine_accuracy": 0.5931351808758197,
167
+ "eval_cosine_accuracy_threshold": 0.9391266107559204,
168
+ "eval_cosine_ap": 0.6494508725044261,
169
+ "eval_cosine_f1": 0.6666196537498678,
170
+ "eval_cosine_f1_threshold": -0.1697666049003601,
171
+ "eval_cosine_mcc": -0.007272583024577166,
172
+ "eval_cosine_precision": 0.4999735547680753,
173
+ "eval_cosine_recall": 0.9998942246668077,
174
+ "eval_loss": 30.446081161499023,
175
+ "eval_runtime": 24.005,
176
+ "eval_samples_per_second": 787.669,
177
+ "eval_steps_per_second": 1.541,
178
+ "step": 14000
179
+ },
180
+ {
181
+ "epoch": 0.7840062720501764,
182
+ "grad_norm": 0.0,
183
+ "learning_rate": 4.232043842682141e-06,
184
+ "loss": 0.0,
185
+ "step": 16000
186
+ },
187
+ {
188
+ "epoch": 0.7840062720501764,
189
+ "eval_cosine_accuracy": 0.5934525068753966,
190
+ "eval_cosine_accuracy_threshold": 0.9391290545463562,
191
+ "eval_cosine_ap": 0.6498809737033857,
192
+ "eval_cosine_f1": 0.6666196537498678,
193
+ "eval_cosine_f1_threshold": -0.17070035636425018,
194
+ "eval_cosine_mcc": -0.007272583024577166,
195
+ "eval_cosine_precision": 0.4999735547680753,
196
+ "eval_cosine_recall": 0.9998942246668077,
197
+ "eval_loss": 30.447351455688477,
198
+ "eval_runtime": 23.3368,
199
+ "eval_samples_per_second": 810.221,
200
+ "eval_steps_per_second": 1.585,
201
+ "step": 16000
202
+ },
203
+ {
204
+ "epoch": 0.8820070560564485,
205
+ "grad_norm": 0.0,
206
+ "learning_rate": 4.103094777562863e-06,
207
+ "loss": 0.0,
208
+ "step": 18000
209
+ },
210
+ {
211
+ "epoch": 0.8820070560564485,
212
+ "eval_cosine_accuracy": 0.593558282208589,
213
+ "eval_cosine_accuracy_threshold": 0.9389414191246033,
214
+ "eval_cosine_ap": 0.6502482026720758,
215
+ "eval_cosine_f1": 0.6666196537498678,
216
+ "eval_cosine_f1_threshold": -0.17154842615127563,
217
+ "eval_cosine_mcc": -0.007272583024577166,
218
+ "eval_cosine_precision": 0.4999735547680753,
219
+ "eval_cosine_recall": 0.9998942246668077,
220
+ "eval_loss": 30.453508377075195,
221
+ "eval_runtime": 23.676,
222
+ "eval_samples_per_second": 798.616,
223
+ "eval_steps_per_second": 1.563,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 0.9800078400627205,
228
+ "grad_norm": 0.0,
229
+ "learning_rate": 3.974145712443585e-06,
230
+ "loss": 0.0,
231
+ "step": 20000
232
+ },
233
+ {
234
+ "epoch": 0.9800078400627205,
235
+ "eval_cosine_accuracy": 0.5937169452083774,
236
+ "eval_cosine_accuracy_threshold": 0.9388357400894165,
237
+ "eval_cosine_ap": 0.6506732897432272,
238
+ "eval_cosine_f1": 0.6666196537498678,
239
+ "eval_cosine_f1_threshold": -0.1724291741847992,
240
+ "eval_cosine_mcc": -0.007272583024577166,
241
+ "eval_cosine_precision": 0.4999735547680753,
242
+ "eval_cosine_recall": 0.9998942246668077,
243
+ "eval_loss": 30.439617156982422,
244
+ "eval_runtime": 23.0663,
245
+ "eval_samples_per_second": 819.725,
246
+ "eval_steps_per_second": 1.604,
247
+ "step": 20000
248
+ },
249
+ {
250
+ "epoch": 1.0780086240689926,
251
+ "grad_norm": 0.0,
252
+ "learning_rate": 3.845196647324307e-06,
253
+ "loss": 0.0,
254
+ "step": 22000
255
+ },
256
+ {
257
+ "epoch": 1.0780086240689926,
258
+ "eval_cosine_accuracy": 0.5937698328749735,
259
+ "eval_cosine_accuracy_threshold": 0.9385948181152344,
260
+ "eval_cosine_ap": 0.6509728340267652,
261
+ "eval_cosine_f1": 0.6666196537498678,
262
+ "eval_cosine_f1_threshold": -0.17285144329071045,
263
+ "eval_cosine_mcc": -0.007272583024577166,
264
+ "eval_cosine_precision": 0.4999735547680753,
265
+ "eval_cosine_recall": 0.9998942246668077,
266
+ "eval_loss": 30.44361686706543,
267
+ "eval_runtime": 25.4596,
268
+ "eval_samples_per_second": 742.666,
269
+ "eval_steps_per_second": 1.453,
270
+ "step": 22000
271
+ },
272
+ {
273
+ "epoch": 1.1760094080752646,
274
+ "grad_norm": 0.0,
275
+ "learning_rate": 3.716247582205029e-06,
276
+ "loss": 0.0,
277
+ "step": 24000
278
+ },
279
+ {
280
+ "epoch": 1.1760094080752646,
281
+ "eval_cosine_accuracy": 0.593928495874762,
282
+ "eval_cosine_accuracy_threshold": 0.9375462532043457,
283
+ "eval_cosine_ap": 0.6513894091627628,
284
+ "eval_cosine_f1": 0.6666196537498678,
285
+ "eval_cosine_f1_threshold": -0.17358532547950745,
286
+ "eval_cosine_mcc": -0.007272583024577166,
287
+ "eval_cosine_precision": 0.4999735547680753,
288
+ "eval_cosine_recall": 0.9998942246668077,
289
+ "eval_loss": 30.4697208404541,
290
+ "eval_runtime": 23.202,
291
+ "eval_samples_per_second": 814.93,
292
+ "eval_steps_per_second": 1.595,
293
+ "step": 24000
294
+ },
295
+ {
296
+ "epoch": 1.2740101920815365,
297
+ "grad_norm": 0.0,
298
+ "learning_rate": 3.587362991618311e-06,
299
+ "loss": 0.0,
300
+ "step": 26000
301
+ },
302
+ {
303
+ "epoch": 1.2740101920815365,
304
+ "eval_cosine_accuracy": 0.5941400465411466,
305
+ "eval_cosine_accuracy_threshold": 0.9373874664306641,
306
+ "eval_cosine_ap": 0.6517436899727478,
307
+ "eval_cosine_f1": 0.6666196537498678,
308
+ "eval_cosine_f1_threshold": -0.17465540766716003,
309
+ "eval_cosine_mcc": -0.007272583024577166,
310
+ "eval_cosine_precision": 0.4999735547680753,
311
+ "eval_cosine_recall": 0.9998942246668077,
312
+ "eval_loss": 30.457592010498047,
313
+ "eval_runtime": 23.1905,
314
+ "eval_samples_per_second": 815.335,
315
+ "eval_steps_per_second": 1.595,
316
+ "step": 26000
317
+ },
318
+ {
319
+ "epoch": 1.3720109760878088,
320
+ "grad_norm": 0.0,
321
+ "learning_rate": 3.458607350096712e-06,
322
+ "loss": 0.0,
323
+ "step": 28000
324
+ },
325
+ {
326
+ "epoch": 1.3720109760878088,
327
+ "eval_cosine_accuracy": 0.6150306748466258,
328
+ "eval_cosine_accuracy_threshold": 0.9150234460830688,
329
+ "eval_cosine_ap": 0.6887500197266508,
330
+ "eval_cosine_f1": 0.6666196537498678,
331
+ "eval_cosine_f1_threshold": 0.005598240531980991,
332
+ "eval_cosine_mcc": -0.007272583024577166,
333
+ "eval_cosine_precision": 0.4999735547680753,
334
+ "eval_cosine_recall": 0.9998942246668077,
335
+ "eval_loss": 27.762596130371094,
336
+ "eval_runtime": 24.3242,
337
+ "eval_samples_per_second": 777.334,
338
+ "eval_steps_per_second": 1.521,
339
+ "step": 28000
340
+ },
341
+ {
342
+ "epoch": 1.4700117600940807,
343
+ "grad_norm": 0.0,
344
+ "learning_rate": 3.329658284977434e-06,
345
+ "loss": 0.0,
346
+ "step": 30000
347
+ },
348
+ {
349
+ "epoch": 1.4700117600940807,
350
+ "eval_cosine_accuracy": 0.5982123968690501,
351
+ "eval_cosine_accuracy_threshold": 0.9481082558631897,
352
+ "eval_cosine_ap": 0.6620788779311684,
353
+ "eval_cosine_f1": 0.6666431593794077,
354
+ "eval_cosine_f1_threshold": -0.026352988556027412,
355
+ "eval_cosine_mcc": 0.0,
356
+ "eval_cosine_precision": 0.5,
357
+ "eval_cosine_recall": 0.9998942246668077,
358
+ "eval_loss": 25.89101791381836,
359
+ "eval_runtime": 23.4116,
360
+ "eval_samples_per_second": 807.635,
361
+ "eval_steps_per_second": 1.58,
362
+ "step": 30000
363
+ },
364
+ {
365
+ "epoch": 1.5680125441003527,
366
+ "grad_norm": 0.0,
367
+ "learning_rate": 3.2007092198581564e-06,
368
+ "loss": 0.0,
369
+ "step": 32000
370
+ },
371
+ {
372
+ "epoch": 1.5680125441003527,
373
+ "eval_cosine_accuracy": 0.5983181722022425,
374
+ "eval_cosine_accuracy_threshold": 0.947978138923645,
375
+ "eval_cosine_ap": 0.6623524771391387,
376
+ "eval_cosine_f1": 0.6666431593794077,
377
+ "eval_cosine_f1_threshold": -0.02726462110877037,
378
+ "eval_cosine_mcc": 0.0,
379
+ "eval_cosine_precision": 0.5,
380
+ "eval_cosine_recall": 0.9998942246668077,
381
+ "eval_loss": 25.8829402923584,
382
+ "eval_runtime": 23.5325,
383
+ "eval_samples_per_second": 803.484,
384
+ "eval_steps_per_second": 1.572,
385
+ "step": 32000
386
+ },
387
+ {
388
+ "epoch": 1.666013328106625,
389
+ "grad_norm": 0.0,
390
+ "learning_rate": 3.0717601547388783e-06,
391
+ "loss": 0.0,
392
+ "step": 34000
393
+ },
394
+ {
395
+ "epoch": 1.666013328106625,
396
+ "eval_cosine_accuracy": 0.5985826105352232,
397
+ "eval_cosine_accuracy_threshold": 0.9477732181549072,
398
+ "eval_cosine_ap": 0.6626794204198572,
399
+ "eval_cosine_f1": 0.6666431593794077,
400
+ "eval_cosine_f1_threshold": -0.027758542448282242,
401
+ "eval_cosine_mcc": 0.0,
402
+ "eval_cosine_precision": 0.5,
403
+ "eval_cosine_recall": 0.9998942246668077,
404
+ "eval_loss": 25.918596267700195,
405
+ "eval_runtime": 23.9566,
406
+ "eval_samples_per_second": 789.259,
407
+ "eval_steps_per_second": 1.544,
408
+ "step": 34000
409
+ },
410
+ {
411
+ "epoch": 1.764014112112897,
412
+ "grad_norm": 0.0,
413
+ "learning_rate": 2.9428110896196007e-06,
414
+ "loss": 0.0,
415
+ "step": 36000
416
+ },
417
+ {
418
+ "epoch": 1.764014112112897,
419
+ "eval_cosine_accuracy": 0.598847048868204,
420
+ "eval_cosine_accuracy_threshold": 0.9478007555007935,
421
+ "eval_cosine_ap": 0.6629160839294745,
422
+ "eval_cosine_f1": 0.6666431593794077,
423
+ "eval_cosine_f1_threshold": -0.028077710419893265,
424
+ "eval_cosine_mcc": 0.0,
425
+ "eval_cosine_precision": 0.5,
426
+ "eval_cosine_recall": 0.9998942246668077,
427
+ "eval_loss": 25.906251907348633,
428
+ "eval_runtime": 22.9116,
429
+ "eval_samples_per_second": 825.26,
430
+ "eval_steps_per_second": 1.615,
431
+ "step": 36000
432
+ },
433
+ {
434
+ "epoch": 1.862014896119169,
435
+ "grad_norm": 0.00737870205193758,
436
+ "learning_rate": 2.8147646679561574e-06,
437
+ "loss": 1.5404,
438
+ "step": 38000
439
+ },
440
+ {
441
+ "epoch": 1.862014896119169,
442
+ "eval_cosine_accuracy": 0.6216416331711445,
443
+ "eval_cosine_accuracy_threshold": 0.9999633431434631,
444
+ "eval_cosine_ap": 0.6896995703091671,
445
+ "eval_cosine_f1": 0.6666196537498678,
446
+ "eval_cosine_f1_threshold": 0.9949546456336975,
447
+ "eval_cosine_mcc": -0.007272583024577166,
448
+ "eval_cosine_precision": 0.4999735547680753,
449
+ "eval_cosine_recall": 0.9998942246668077,
450
+ "eval_loss": 28.107051849365234,
451
+ "eval_runtime": 23.2921,
452
+ "eval_samples_per_second": 811.777,
453
+ "eval_steps_per_second": 1.589,
454
+ "step": 38000
455
+ },
456
+ {
457
+ "epoch": 1.9600156801254411,
458
+ "grad_norm": 0.004108751192688942,
459
+ "learning_rate": 2.68581560283688e-06,
460
+ "loss": 0.0017,
461
+ "step": 40000
462
+ },
463
+ {
464
+ "epoch": 1.9600156801254411,
465
+ "eval_cosine_accuracy": 0.623598476835202,
466
+ "eval_cosine_accuracy_threshold": 0.9999710917472839,
467
+ "eval_cosine_ap": 0.696279151150395,
468
+ "eval_cosine_f1": 0.6666196537498678,
469
+ "eval_cosine_f1_threshold": 0.9948583245277405,
470
+ "eval_cosine_mcc": -0.007272583024577166,
471
+ "eval_cosine_precision": 0.4999735547680753,
472
+ "eval_cosine_recall": 0.9998942246668077,
473
+ "eval_loss": 27.634450912475586,
474
+ "eval_runtime": 23.2848,
475
+ "eval_samples_per_second": 812.033,
476
+ "eval_steps_per_second": 1.589,
477
+ "step": 40000
478
+ },
479
+ {
480
+ "epoch": 2.058016464131713,
481
+ "grad_norm": 0.002198939910158515,
482
+ "learning_rate": 2.5568665377176017e-06,
483
+ "loss": 0.0007,
484
+ "step": 42000
485
+ },
486
+ {
487
+ "epoch": 2.058016464131713,
488
+ "eval_cosine_accuracy": 0.6238100275015867,
489
+ "eval_cosine_accuracy_threshold": 0.9999785423278809,
490
+ "eval_cosine_ap": 0.7001012970861294,
491
+ "eval_cosine_f1": 0.6666196537498678,
492
+ "eval_cosine_f1_threshold": 0.9943636655807495,
493
+ "eval_cosine_mcc": -0.007272583024577166,
494
+ "eval_cosine_precision": 0.4999735547680753,
495
+ "eval_cosine_recall": 0.9998942246668077,
496
+ "eval_loss": 26.901527404785156,
497
+ "eval_runtime": 24.7872,
498
+ "eval_samples_per_second": 762.814,
499
+ "eval_steps_per_second": 1.493,
500
+ "step": 42000
501
+ },
502
+ {
503
+ "epoch": 2.156017248137985,
504
+ "grad_norm": 0.0011129860067740083,
505
+ "learning_rate": 2.427917472598324e-06,
506
+ "loss": 0.0003,
507
+ "step": 44000
508
+ },
509
+ {
510
+ "epoch": 2.156017248137985,
511
+ "eval_cosine_accuracy": 0.6339644594880474,
512
+ "eval_cosine_accuracy_threshold": 0.9999836683273315,
513
+ "eval_cosine_ap": 0.7180590205623526,
514
+ "eval_cosine_f1": 0.6666196537498678,
515
+ "eval_cosine_f1_threshold": 0.9934605360031128,
516
+ "eval_cosine_mcc": -0.007272583024577166,
517
+ "eval_cosine_precision": 0.4999735547680753,
518
+ "eval_cosine_recall": 0.9998942246668077,
519
+ "eval_loss": 26.755794525146484,
520
+ "eval_runtime": 23.8512,
521
+ "eval_samples_per_second": 792.75,
522
+ "eval_steps_per_second": 1.551,
523
+ "step": 44000
524
+ },
525
+ {
526
+ "epoch": 2.2540180321442573,
527
+ "grad_norm": 0.0,
528
+ "learning_rate": 2.298968407479046e-06,
529
+ "loss": 0.9002,
530
+ "step": 46000
531
+ },
532
+ {
533
+ "epoch": 2.2540180321442573,
534
+ "eval_cosine_accuracy": 0.5470171356039771,
535
+ "eval_cosine_accuracy_threshold": 0.9858599305152893,
536
+ "eval_cosine_ap": 0.5604867418651215,
537
+ "eval_cosine_f1": 0.6666196537498678,
538
+ "eval_cosine_f1_threshold": -0.4505715072154999,
539
+ "eval_cosine_mcc": -0.007272583024577166,
540
+ "eval_cosine_precision": 0.4999735547680753,
541
+ "eval_cosine_recall": 0.9998942246668077,
542
+ "eval_loss": 41.99852752685547,
543
+ "eval_runtime": 23.9606,
544
+ "eval_samples_per_second": 789.128,
545
+ "eval_steps_per_second": 1.544,
546
+ "step": 46000
547
+ },
548
+ {
549
+ "epoch": 2.352018816150529,
550
+ "grad_norm": 0.0,
551
+ "learning_rate": 2.1700193423597684e-06,
552
+ "loss": 0.0,
553
+ "step": 48000
554
+ },
555
+ {
556
+ "epoch": 2.352018816150529,
557
+ "eval_cosine_accuracy": 0.5442140892743812,
558
+ "eval_cosine_accuracy_threshold": 0.9832857847213745,
559
+ "eval_cosine_ap": 0.55157056706159,
560
+ "eval_cosine_f1": 0.6666901731250661,
561
+ "eval_cosine_f1_threshold": -0.2545183300971985,
562
+ "eval_cosine_mcc": 0.007272583024577166,
563
+ "eval_cosine_precision": 0.5000264452319246,
564
+ "eval_cosine_recall": 1.0,
565
+ "eval_loss": 44.62057876586914,
566
+ "eval_runtime": 24.1334,
567
+ "eval_samples_per_second": 783.478,
568
+ "eval_steps_per_second": 1.533,
569
+ "step": 48000
570
+ },
571
+ {
572
+ "epoch": 2.4500196001568013,
573
+ "grad_norm": 0.0,
574
+ "learning_rate": 2.0410702772404903e-06,
575
+ "loss": 0.0,
576
+ "step": 50000
577
+ },
578
+ {
579
+ "epoch": 2.4500196001568013,
580
+ "eval_cosine_accuracy": 0.5505606092659192,
581
+ "eval_cosine_accuracy_threshold": 0.9857516288757324,
582
+ "eval_cosine_ap": 0.5683410397390614,
583
+ "eval_cosine_f1": 0.6666196537498678,
584
+ "eval_cosine_f1_threshold": -0.29352766275405884,
585
+ "eval_cosine_mcc": -0.007272583024577166,
586
+ "eval_cosine_precision": 0.4999735547680753,
587
+ "eval_cosine_recall": 0.9998942246668077,
588
+ "eval_loss": 40.676856994628906,
589
+ "eval_runtime": 25.7266,
590
+ "eval_samples_per_second": 734.96,
591
+ "eval_steps_per_second": 1.438,
592
+ "step": 50000
593
+ },
594
+ {
595
+ "epoch": 2.548020384163073,
596
+ "grad_norm": 0.0,
597
+ "learning_rate": 1.9121212121212123e-06,
598
+ "loss": 0.0,
599
+ "step": 52000
600
+ },
601
+ {
602
+ "epoch": 2.548020384163073,
603
+ "eval_cosine_accuracy": 0.5527290035963613,
604
+ "eval_cosine_accuracy_threshold": 0.9838961362838745,
605
+ "eval_cosine_ap": 0.5783079089292276,
606
+ "eval_cosine_f1": 0.6666901731250661,
607
+ "eval_cosine_f1_threshold": -0.3105597496032715,
608
+ "eval_cosine_mcc": 0.007272583024577166,
609
+ "eval_cosine_precision": 0.5000264452319246,
610
+ "eval_cosine_recall": 1.0,
611
+ "eval_loss": 49.68011474609375,
612
+ "eval_runtime": 24.0384,
613
+ "eval_samples_per_second": 786.575,
614
+ "eval_steps_per_second": 1.539,
615
+ "step": 52000
616
+ },
617
+ {
618
+ "epoch": 2.6460211681693453,
619
+ "grad_norm": 0.0,
620
+ "learning_rate": 1.7831721470019344e-06,
621
+ "loss": 0.0,
622
+ "step": 54000
623
+ },
624
+ {
625
+ "epoch": 2.6460211681693453,
626
+ "eval_cosine_accuracy": 0.5522530145969959,
627
+ "eval_cosine_accuracy_threshold": 0.9908125400543213,
628
+ "eval_cosine_ap": 0.573218654996148,
629
+ "eval_cosine_f1": 0.6666196537498678,
630
+ "eval_cosine_f1_threshold": -0.12428700923919678,
631
+ "eval_cosine_mcc": -0.007272583024577166,
632
+ "eval_cosine_precision": 0.4999735547680753,
633
+ "eval_cosine_recall": 0.9998942246668077,
634
+ "eval_loss": 33.039730072021484,
635
+ "eval_runtime": 23.0962,
636
+ "eval_samples_per_second": 818.662,
637
+ "eval_steps_per_second": 1.602,
638
+ "step": 54000
639
+ },
640
+ {
641
+ "epoch": 2.7440219521756175,
642
+ "grad_norm": 0.0,
643
+ "learning_rate": 1.6542230818826566e-06,
644
+ "loss": 0.0,
645
+ "step": 56000
646
+ },
647
+ {
648
+ "epoch": 2.7440219521756175,
649
+ "eval_cosine_accuracy": 0.5532049925957266,
650
+ "eval_cosine_accuracy_threshold": 0.9827109575271606,
651
+ "eval_cosine_ap": 0.5732697527690604,
652
+ "eval_cosine_f1": 0.6666901731250661,
653
+ "eval_cosine_f1_threshold": -0.32465463876724243,
654
+ "eval_cosine_mcc": 0.007272583024577166,
655
+ "eval_cosine_precision": 0.5000264452319246,
656
+ "eval_cosine_recall": 1.0,
657
+ "eval_loss": 48.252845764160156,
658
+ "eval_runtime": 24.2479,
659
+ "eval_samples_per_second": 779.779,
660
+ "eval_steps_per_second": 1.526,
661
+ "step": 56000
662
+ },
663
+ {
664
+ "epoch": 2.8420227361818897,
665
+ "grad_norm": 0.0,
666
+ "learning_rate": 1.5252740167633787e-06,
667
+ "loss": 0.0,
668
+ "step": 58000
669
+ },
670
+ {
671
+ "epoch": 2.8420227361818897,
672
+ "eval_cosine_accuracy": 0.5468584726041887,
673
+ "eval_cosine_accuracy_threshold": 0.9873851537704468,
674
+ "eval_cosine_ap": 0.5562632644839256,
675
+ "eval_cosine_f1": 0.6666431593794077,
676
+ "eval_cosine_f1_threshold": -0.1655093878507614,
677
+ "eval_cosine_mcc": 0.0,
678
+ "eval_cosine_precision": 0.5,
679
+ "eval_cosine_recall": 0.9998942246668077,
680
+ "eval_loss": 39.66588592529297,
681
+ "eval_runtime": 23.254,
682
+ "eval_samples_per_second": 813.108,
683
+ "eval_steps_per_second": 1.591,
684
+ "step": 58000
685
+ },
686
+ {
687
+ "epoch": 2.9400235201881615,
688
+ "grad_norm": 0.0,
689
+ "learning_rate": 1.3963249516441009e-06,
690
+ "loss": 0.0,
691
+ "step": 60000
692
+ },
693
+ {
694
+ "epoch": 2.9400235201881615,
695
+ "eval_cosine_accuracy": 0.5468584726041887,
696
+ "eval_cosine_accuracy_threshold": 0.9873851537704468,
697
+ "eval_cosine_ap": 0.5562632644839256,
698
+ "eval_cosine_f1": 0.6666431593794077,
699
+ "eval_cosine_f1_threshold": -0.1655093878507614,
700
+ "eval_cosine_mcc": 0.0,
701
+ "eval_cosine_precision": 0.5,
702
+ "eval_cosine_recall": 0.9998942246668077,
703
+ "eval_loss": 39.66588592529297,
704
+ "eval_runtime": 23.1741,
705
+ "eval_samples_per_second": 815.91,
706
+ "eval_steps_per_second": 1.597,
707
+ "step": 60000
708
+ }
709
+ ],
710
+ "logging_steps": 2000,
711
+ "max_steps": 81632,
712
+ "num_input_tokens_seen": 0,
713
+ "num_train_epochs": 4,
714
+ "save_steps": 500,
715
+ "stateful_callbacks": {
716
+ "TrainerControl": {
717
+ "args": {
718
+ "should_epoch_stop": false,
719
+ "should_evaluate": false,
720
+ "should_log": false,
721
+ "should_save": true,
722
+ "should_training_stop": false
723
+ },
724
+ "attributes": {}
725
+ }
726
+ },
727
+ "total_flos": 0.0,
728
+ "train_batch_size": 512,
729
+ "trial_name": null,
730
+ "trial_params": null
731
+ }
checkpoint-61224/training_metrics.json ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metrics_history": [
3
+ {
4
+ "epoch": 0.09800078400627205,
5
+ "global_step": 2000,
6
+ "eval_loss": 26.897747039794922,
7
+ "eval_cosine_accuracy": 0.6100063465199915,
8
+ "eval_cosine_accuracy_threshold": 0.9997795820236206,
9
+ "eval_cosine_f1": 0.6667136862150438,
10
+ "eval_cosine_f1_threshold": 0.991624653339386,
11
+ "eval_cosine_precision": 0.5000793524837327,
12
+ "eval_cosine_recall": 0.9998942246668077,
13
+ "eval_cosine_ap": 0.6786677205526788,
14
+ "eval_cosine_mcc": 0.00975822629781094
15
+ },
16
+ {
17
+ "epoch": 0.1960015680125441,
18
+ "global_step": 4000,
19
+ "eval_loss": 27.12621307373047,
20
+ "eval_cosine_accuracy": 0.6240215781679712,
21
+ "eval_cosine_accuracy_threshold": 0.9999226331710815,
22
+ "eval_cosine_f1": 0.6666196537498678,
23
+ "eval_cosine_f1_threshold": 0.9939442873001099,
24
+ "eval_cosine_precision": 0.4999735547680753,
25
+ "eval_cosine_recall": 0.9998942246668077,
26
+ "eval_cosine_ap": 0.6923415808404929,
27
+ "eval_cosine_mcc": -0.007272583024577166
28
+ },
29
+ {
30
+ "epoch": 0.29400235201881614,
31
+ "global_step": 6000,
32
+ "eval_loss": 32.61564254760742,
33
+ "eval_cosine_accuracy": 0.5835096255553205,
34
+ "eval_cosine_accuracy_threshold": 0.9478387236595154,
35
+ "eval_cosine_f1": 0.6666901731250661,
36
+ "eval_cosine_f1_threshold": -0.09354762732982635,
37
+ "eval_cosine_precision": 0.5000264452319246,
38
+ "eval_cosine_recall": 1.0,
39
+ "eval_cosine_ap": 0.6313955266297183,
40
+ "eval_cosine_mcc": 0.007272583024577166
41
+ },
42
+ {
43
+ "epoch": 0.3920031360250882,
44
+ "global_step": 8000,
45
+ "eval_loss": 33.463409423828125,
46
+ "eval_cosine_accuracy": 0.5764226782314364,
47
+ "eval_cosine_accuracy_threshold": 0.953868567943573,
48
+ "eval_cosine_f1": 0.6666431593794077,
49
+ "eval_cosine_f1_threshold": -0.09449770301580429,
50
+ "eval_cosine_precision": 0.5,
51
+ "eval_cosine_recall": 0.9998942246668077,
52
+ "eval_cosine_ap": 0.6184434211752825,
53
+ "eval_cosine_mcc": 0.0
54
+ },
55
+ {
56
+ "epoch": 0.49000392003136023,
57
+ "global_step": 10000,
58
+ "eval_loss": 35.26945495605469,
59
+ "eval_cosine_accuracy": 0.5845673788872435,
60
+ "eval_cosine_accuracy_threshold": 0.9387475848197937,
61
+ "eval_cosine_f1": 0.6666196537498678,
62
+ "eval_cosine_f1_threshold": -0.1685822755098343,
63
+ "eval_cosine_precision": 0.4999735547680753,
64
+ "eval_cosine_recall": 0.9998942246668077,
65
+ "eval_cosine_ap": 0.6273588632357191,
66
+ "eval_cosine_mcc": -0.007272583024577166
67
+ },
68
+ {
69
+ "epoch": 0.5880047040376323,
70
+ "global_step": 12000,
71
+ "eval_loss": 30.115076065063477,
72
+ "eval_cosine_accuracy": 0.5943515972075312,
73
+ "eval_cosine_accuracy_threshold": 0.9397846460342407,
74
+ "eval_cosine_f1": 0.6666196537498678,
75
+ "eval_cosine_f1_threshold": -0.17127826809883118,
76
+ "eval_cosine_precision": 0.4999735547680753,
77
+ "eval_cosine_recall": 0.9998942246668077,
78
+ "eval_cosine_ap": 0.6521863345575756,
79
+ "eval_cosine_mcc": -0.007272583024577166
80
+ },
81
+ {
82
+ "epoch": 0.6860054880439044,
83
+ "global_step": 14000,
84
+ "eval_loss": 30.446081161499023,
85
+ "eval_cosine_accuracy": 0.5931351808758197,
86
+ "eval_cosine_accuracy_threshold": 0.9391266107559204,
87
+ "eval_cosine_f1": 0.6666196537498678,
88
+ "eval_cosine_f1_threshold": -0.1697666049003601,
89
+ "eval_cosine_precision": 0.4999735547680753,
90
+ "eval_cosine_recall": 0.9998942246668077,
91
+ "eval_cosine_ap": 0.6494508725044261,
92
+ "eval_cosine_mcc": -0.007272583024577166
93
+ },
94
+ {
95
+ "epoch": 0.7840062720501764,
96
+ "global_step": 16000,
97
+ "eval_loss": 30.447351455688477,
98
+ "eval_cosine_accuracy": 0.5934525068753966,
99
+ "eval_cosine_accuracy_threshold": 0.9391290545463562,
100
+ "eval_cosine_f1": 0.6666196537498678,
101
+ "eval_cosine_f1_threshold": -0.17070035636425018,
102
+ "eval_cosine_precision": 0.4999735547680753,
103
+ "eval_cosine_recall": 0.9998942246668077,
104
+ "eval_cosine_ap": 0.6498809737033857,
105
+ "eval_cosine_mcc": -0.007272583024577166
106
+ },
107
+ {
108
+ "epoch": 0.8820070560564485,
109
+ "global_step": 18000,
110
+ "eval_loss": 30.453508377075195,
111
+ "eval_cosine_accuracy": 0.593558282208589,
112
+ "eval_cosine_accuracy_threshold": 0.9389414191246033,
113
+ "eval_cosine_f1": 0.6666196537498678,
114
+ "eval_cosine_f1_threshold": -0.17154842615127563,
115
+ "eval_cosine_precision": 0.4999735547680753,
116
+ "eval_cosine_recall": 0.9998942246668077,
117
+ "eval_cosine_ap": 0.6502482026720758,
118
+ "eval_cosine_mcc": -0.007272583024577166
119
+ },
120
+ {
121
+ "epoch": 0.9800078400627205,
122
+ "global_step": 20000,
123
+ "eval_loss": 30.439617156982422,
124
+ "eval_cosine_accuracy": 0.5937169452083774,
125
+ "eval_cosine_accuracy_threshold": 0.9388357400894165,
126
+ "eval_cosine_f1": 0.6666196537498678,
127
+ "eval_cosine_f1_threshold": -0.1724291741847992,
128
+ "eval_cosine_precision": 0.4999735547680753,
129
+ "eval_cosine_recall": 0.9998942246668077,
130
+ "eval_cosine_ap": 0.6506732897432272,
131
+ "eval_cosine_mcc": -0.007272583024577166
132
+ },
133
+ {
134
+ "epoch": 1.0780086240689926,
135
+ "global_step": 22000,
136
+ "eval_loss": 30.44361686706543,
137
+ "eval_cosine_accuracy": 0.5937698328749735,
138
+ "eval_cosine_accuracy_threshold": 0.9385948181152344,
139
+ "eval_cosine_f1": 0.6666196537498678,
140
+ "eval_cosine_f1_threshold": -0.17285144329071045,
141
+ "eval_cosine_precision": 0.4999735547680753,
142
+ "eval_cosine_recall": 0.9998942246668077,
143
+ "eval_cosine_ap": 0.6509728340267652,
144
+ "eval_cosine_mcc": -0.007272583024577166
145
+ },
146
+ {
147
+ "epoch": 1.1760094080752646,
148
+ "global_step": 24000,
149
+ "eval_loss": 30.4697208404541,
150
+ "eval_cosine_accuracy": 0.593928495874762,
151
+ "eval_cosine_accuracy_threshold": 0.9375462532043457,
152
+ "eval_cosine_f1": 0.6666196537498678,
153
+ "eval_cosine_f1_threshold": -0.17358532547950745,
154
+ "eval_cosine_precision": 0.4999735547680753,
155
+ "eval_cosine_recall": 0.9998942246668077,
156
+ "eval_cosine_ap": 0.6513894091627628,
157
+ "eval_cosine_mcc": -0.007272583024577166
158
+ },
159
+ {
160
+ "epoch": 1.2740101920815365,
161
+ "global_step": 26000,
162
+ "eval_loss": 30.457592010498047,
163
+ "eval_cosine_accuracy": 0.5941400465411466,
164
+ "eval_cosine_accuracy_threshold": 0.9373874664306641,
165
+ "eval_cosine_f1": 0.6666196537498678,
166
+ "eval_cosine_f1_threshold": -0.17465540766716003,
167
+ "eval_cosine_precision": 0.4999735547680753,
168
+ "eval_cosine_recall": 0.9998942246668077,
169
+ "eval_cosine_ap": 0.6517436899727478,
170
+ "eval_cosine_mcc": -0.007272583024577166
171
+ },
172
+ {
173
+ "epoch": 1.3720109760878088,
174
+ "global_step": 28000,
175
+ "eval_loss": 27.762596130371094,
176
+ "eval_cosine_accuracy": 0.6150306748466258,
177
+ "eval_cosine_accuracy_threshold": 0.9150234460830688,
178
+ "eval_cosine_f1": 0.6666196537498678,
179
+ "eval_cosine_f1_threshold": 0.005598240531980991,
180
+ "eval_cosine_precision": 0.4999735547680753,
181
+ "eval_cosine_recall": 0.9998942246668077,
182
+ "eval_cosine_ap": 0.6887500197266508,
183
+ "eval_cosine_mcc": -0.007272583024577166
184
+ },
185
+ {
186
+ "epoch": 1.4700117600940807,
187
+ "global_step": 30000,
188
+ "eval_loss": 25.89101791381836,
189
+ "eval_cosine_accuracy": 0.5982123968690501,
190
+ "eval_cosine_accuracy_threshold": 0.9481082558631897,
191
+ "eval_cosine_f1": 0.6666431593794077,
192
+ "eval_cosine_f1_threshold": -0.026352988556027412,
193
+ "eval_cosine_precision": 0.5,
194
+ "eval_cosine_recall": 0.9998942246668077,
195
+ "eval_cosine_ap": 0.6620788779311684,
196
+ "eval_cosine_mcc": 0.0
197
+ },
198
+ {
199
+ "epoch": 1.5680125441003527,
200
+ "global_step": 32000,
201
+ "eval_loss": 25.8829402923584,
202
+ "eval_cosine_accuracy": 0.5983181722022425,
203
+ "eval_cosine_accuracy_threshold": 0.947978138923645,
204
+ "eval_cosine_f1": 0.6666431593794077,
205
+ "eval_cosine_f1_threshold": -0.02726462110877037,
206
+ "eval_cosine_precision": 0.5,
207
+ "eval_cosine_recall": 0.9998942246668077,
208
+ "eval_cosine_ap": 0.6623524771391387,
209
+ "eval_cosine_mcc": 0.0
210
+ },
211
+ {
212
+ "epoch": 1.666013328106625,
213
+ "global_step": 34000,
214
+ "eval_loss": 25.918596267700195,
215
+ "eval_cosine_accuracy": 0.5985826105352232,
216
+ "eval_cosine_accuracy_threshold": 0.9477732181549072,
217
+ "eval_cosine_f1": 0.6666431593794077,
218
+ "eval_cosine_f1_threshold": -0.027758542448282242,
219
+ "eval_cosine_precision": 0.5,
220
+ "eval_cosine_recall": 0.9998942246668077,
221
+ "eval_cosine_ap": 0.6626794204198572,
222
+ "eval_cosine_mcc": 0.0
223
+ },
224
+ {
225
+ "epoch": 1.764014112112897,
226
+ "global_step": 36000,
227
+ "eval_loss": 25.906251907348633,
228
+ "eval_cosine_accuracy": 0.598847048868204,
229
+ "eval_cosine_accuracy_threshold": 0.9478007555007935,
230
+ "eval_cosine_f1": 0.6666431593794077,
231
+ "eval_cosine_f1_threshold": -0.028077710419893265,
232
+ "eval_cosine_precision": 0.5,
233
+ "eval_cosine_recall": 0.9998942246668077,
234
+ "eval_cosine_ap": 0.6629160839294745,
235
+ "eval_cosine_mcc": 0.0
236
+ },
237
+ {
238
+ "epoch": 1.862014896119169,
239
+ "global_step": 38000,
240
+ "eval_loss": 28.107051849365234,
241
+ "eval_cosine_accuracy": 0.6216416331711445,
242
+ "eval_cosine_accuracy_threshold": 0.9999633431434631,
243
+ "eval_cosine_f1": 0.6666196537498678,
244
+ "eval_cosine_f1_threshold": 0.9949546456336975,
245
+ "eval_cosine_precision": 0.4999735547680753,
246
+ "eval_cosine_recall": 0.9998942246668077,
247
+ "eval_cosine_ap": 0.6896995703091671,
248
+ "eval_cosine_mcc": -0.007272583024577166
249
+ },
250
+ {
251
+ "epoch": 1.9600156801254411,
252
+ "global_step": 40000,
253
+ "eval_loss": 27.634450912475586,
254
+ "eval_cosine_accuracy": 0.623598476835202,
255
+ "eval_cosine_accuracy_threshold": 0.9999710917472839,
256
+ "eval_cosine_f1": 0.6666196537498678,
257
+ "eval_cosine_f1_threshold": 0.9948583245277405,
258
+ "eval_cosine_precision": 0.4999735547680753,
259
+ "eval_cosine_recall": 0.9998942246668077,
260
+ "eval_cosine_ap": 0.696279151150395,
261
+ "eval_cosine_mcc": -0.007272583024577166
262
+ },
263
+ {
264
+ "epoch": 2.058016464131713,
265
+ "global_step": 42000,
266
+ "eval_loss": 26.901527404785156,
267
+ "eval_cosine_accuracy": 0.6238100275015867,
268
+ "eval_cosine_accuracy_threshold": 0.9999785423278809,
269
+ "eval_cosine_f1": 0.6666196537498678,
270
+ "eval_cosine_f1_threshold": 0.9943636655807495,
271
+ "eval_cosine_precision": 0.4999735547680753,
272
+ "eval_cosine_recall": 0.9998942246668077,
273
+ "eval_cosine_ap": 0.7001012970861294,
274
+ "eval_cosine_mcc": -0.007272583024577166
275
+ },
276
+ {
277
+ "epoch": 2.156017248137985,
278
+ "global_step": 44000,
279
+ "eval_loss": 26.755794525146484,
280
+ "eval_cosine_accuracy": 0.6339644594880474,
281
+ "eval_cosine_accuracy_threshold": 0.9999836683273315,
282
+ "eval_cosine_f1": 0.6666196537498678,
283
+ "eval_cosine_f1_threshold": 0.9934605360031128,
284
+ "eval_cosine_precision": 0.4999735547680753,
285
+ "eval_cosine_recall": 0.9998942246668077,
286
+ "eval_cosine_ap": 0.7180590205623526,
287
+ "eval_cosine_mcc": -0.007272583024577166
288
+ },
289
+ {
290
+ "epoch": 2.2540180321442573,
291
+ "global_step": 46000,
292
+ "eval_loss": 41.99852752685547,
293
+ "eval_cosine_accuracy": 0.5470171356039771,
294
+ "eval_cosine_accuracy_threshold": 0.9858599305152893,
295
+ "eval_cosine_f1": 0.6666196537498678,
296
+ "eval_cosine_f1_threshold": -0.4505715072154999,
297
+ "eval_cosine_precision": 0.4999735547680753,
298
+ "eval_cosine_recall": 0.9998942246668077,
299
+ "eval_cosine_ap": 0.5604867418651215,
300
+ "eval_cosine_mcc": -0.007272583024577166
301
+ },
302
+ {
303
+ "epoch": 2.352018816150529,
304
+ "global_step": 48000,
305
+ "eval_loss": 44.62057876586914,
306
+ "eval_cosine_accuracy": 0.5442140892743812,
307
+ "eval_cosine_accuracy_threshold": 0.9832857847213745,
308
+ "eval_cosine_f1": 0.6666901731250661,
309
+ "eval_cosine_f1_threshold": -0.2545183300971985,
310
+ "eval_cosine_precision": 0.5000264452319246,
311
+ "eval_cosine_recall": 1.0,
312
+ "eval_cosine_ap": 0.55157056706159,
313
+ "eval_cosine_mcc": 0.007272583024577166
314
+ },
315
+ {
316
+ "epoch": 2.4500196001568013,
317
+ "global_step": 50000,
318
+ "eval_loss": 40.676856994628906,
319
+ "eval_cosine_accuracy": 0.5505606092659192,
320
+ "eval_cosine_accuracy_threshold": 0.9857516288757324,
321
+ "eval_cosine_f1": 0.6666196537498678,
322
+ "eval_cosine_f1_threshold": -0.29352766275405884,
323
+ "eval_cosine_precision": 0.4999735547680753,
324
+ "eval_cosine_recall": 0.9998942246668077,
325
+ "eval_cosine_ap": 0.5683410397390614,
326
+ "eval_cosine_mcc": -0.007272583024577166
327
+ },
328
+ {
329
+ "epoch": 2.548020384163073,
330
+ "global_step": 52000,
331
+ "eval_loss": 49.68011474609375,
332
+ "eval_cosine_accuracy": 0.5527290035963613,
333
+ "eval_cosine_accuracy_threshold": 0.9838961362838745,
334
+ "eval_cosine_f1": 0.6666901731250661,
335
+ "eval_cosine_f1_threshold": -0.3105597496032715,
336
+ "eval_cosine_precision": 0.5000264452319246,
337
+ "eval_cosine_recall": 1.0,
338
+ "eval_cosine_ap": 0.5783079089292276,
339
+ "eval_cosine_mcc": 0.007272583024577166
340
+ },
341
+ {
342
+ "epoch": 2.6460211681693453,
343
+ "global_step": 54000,
344
+ "eval_loss": 33.039730072021484,
345
+ "eval_cosine_accuracy": 0.5522530145969959,
346
+ "eval_cosine_accuracy_threshold": 0.9908125400543213,
347
+ "eval_cosine_f1": 0.6666196537498678,
348
+ "eval_cosine_f1_threshold": -0.12428700923919678,
349
+ "eval_cosine_precision": 0.4999735547680753,
350
+ "eval_cosine_recall": 0.9998942246668077,
351
+ "eval_cosine_ap": 0.573218654996148,
352
+ "eval_cosine_mcc": -0.007272583024577166
353
+ },
354
+ {
355
+ "epoch": 2.7440219521756175,
356
+ "global_step": 56000,
357
+ "eval_loss": 48.252845764160156,
358
+ "eval_cosine_accuracy": 0.5532049925957266,
359
+ "eval_cosine_accuracy_threshold": 0.9827109575271606,
360
+ "eval_cosine_f1": 0.6666901731250661,
361
+ "eval_cosine_f1_threshold": -0.32465463876724243,
362
+ "eval_cosine_precision": 0.5000264452319246,
363
+ "eval_cosine_recall": 1.0,
364
+ "eval_cosine_ap": 0.5732697527690604,
365
+ "eval_cosine_mcc": 0.007272583024577166
366
+ },
367
+ {
368
+ "epoch": 2.8420227361818897,
369
+ "global_step": 58000,
370
+ "eval_loss": 39.66588592529297,
371
+ "eval_cosine_accuracy": 0.5468584726041887,
372
+ "eval_cosine_accuracy_threshold": 0.9873851537704468,
373
+ "eval_cosine_f1": 0.6666431593794077,
374
+ "eval_cosine_f1_threshold": -0.1655093878507614,
375
+ "eval_cosine_precision": 0.5,
376
+ "eval_cosine_recall": 0.9998942246668077,
377
+ "eval_cosine_ap": 0.5562632644839256,
378
+ "eval_cosine_mcc": 0.0
379
+ },
380
+ {
381
+ "epoch": 2.9400235201881615,
382
+ "global_step": 60000,
383
+ "eval_loss": 39.66588592529297,
384
+ "eval_cosine_accuracy": 0.5468584726041887,
385
+ "eval_cosine_accuracy_threshold": 0.9873851537704468,
386
+ "eval_cosine_f1": 0.6666431593794077,
387
+ "eval_cosine_f1_threshold": -0.1655093878507614,
388
+ "eval_cosine_precision": 0.5,
389
+ "eval_cosine_recall": 0.9998942246668077,
390
+ "eval_cosine_ap": 0.5562632644839256,
391
+ "eval_cosine_mcc": 0.0
392
+ }
393
+ ],
394
+ "current_epoch": 3,
395
+ "global_step": 61224
396
+ }
checkpoint-61224/vocab.txt ADDED
The diff for this file is too large to render. See raw diff