leoner24 commited on
Commit
4e77c55
·
verified ·
1 Parent(s): 6850aa8

Upload 9 files

Browse files
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "checkpoints/bert-large-uncased",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4096,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "transformers_version": "4.2.1",
21
+ "type_vocab_size": 2,
22
+ "use_cache": true,
23
+ "vocab_size": 30522
24
+ }
eval_results.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch = 3.0
2
+ eval_CR = 90.51
3
+ eval_MPQA = 89.4
4
+ eval_MR = 83.79
5
+ eval_MRPC = 75.74
6
+ eval_SST2 = 89.11
7
+ eval_SUBJ = 94.58
8
+ eval_TREC = 77.48
9
+ eval_avg_sts = 0.8528364235468135
10
+ eval_avg_transfer = 85.80142857142857
11
+ eval_sickr_spearman = 0.8424179684340722
12
+ eval_stsb_spearman = 0.8632548786595547
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78710b3a7c80e4b953c58861611794e645bef5ceca2dcf40ad9a37e3c9626d71
3
+ size 1470047490
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/data/lyhe/data/bert-large-uncased"}
train_results.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epoch = 3.0
2
+ train_runtime = 8699.7484
3
+ train_samples_per_second = 0.354
trainer_state.json ADDED
@@ -0,0 +1,919 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8528364235468135,
3
+ "best_model_checkpoint": "checkpoints/bert-large-uncased",
4
+ "epoch": 3.0,
5
+ "global_step": 3081,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "eval_avg_sts": 0.7741718103390286,
13
+ "eval_sickr_spearman": 0.745954513736433,
14
+ "eval_stsb_spearman": 0.8023891069416242,
15
+ "step": 25
16
+ },
17
+ {
18
+ "epoch": 0.05,
19
+ "eval_avg_sts": 0.8081911987521978,
20
+ "eval_sickr_spearman": 0.779239730815335,
21
+ "eval_stsb_spearman": 0.8371426666890606,
22
+ "step": 50
23
+ },
24
+ {
25
+ "epoch": 0.07,
26
+ "eval_avg_sts": 0.8136161115454807,
27
+ "eval_sickr_spearman": 0.7835086390489516,
28
+ "eval_stsb_spearman": 0.8437235840420098,
29
+ "step": 75
30
+ },
31
+ {
32
+ "epoch": 0.1,
33
+ "eval_avg_sts": 0.8155047769520929,
34
+ "eval_sickr_spearman": 0.7852900645701897,
35
+ "eval_stsb_spearman": 0.845719489333996,
36
+ "step": 100
37
+ },
38
+ {
39
+ "epoch": 0.12,
40
+ "eval_avg_sts": 0.8238067491381542,
41
+ "eval_sickr_spearman": 0.7985816632400571,
42
+ "eval_stsb_spearman": 0.8490318350362512,
43
+ "step": 125
44
+ },
45
+ {
46
+ "epoch": 0.15,
47
+ "eval_avg_sts": 0.8224993495563874,
48
+ "eval_sickr_spearman": 0.7982152819982874,
49
+ "eval_stsb_spearman": 0.8467834171144873,
50
+ "step": 150
51
+ },
52
+ {
53
+ "epoch": 0.17,
54
+ "eval_avg_sts": 0.8263297389680633,
55
+ "eval_sickr_spearman": 0.8034025929225019,
56
+ "eval_stsb_spearman": 0.8492568850136247,
57
+ "step": 175
58
+ },
59
+ {
60
+ "epoch": 0.19,
61
+ "eval_avg_sts": 0.82711974150715,
62
+ "eval_sickr_spearman": 0.8028132513078563,
63
+ "eval_stsb_spearman": 0.8514262317064436,
64
+ "step": 200
65
+ },
66
+ {
67
+ "epoch": 0.22,
68
+ "eval_avg_sts": 0.8285051091857007,
69
+ "eval_sickr_spearman": 0.8064033359848481,
70
+ "eval_stsb_spearman": 0.8506068823865535,
71
+ "step": 225
72
+ },
73
+ {
74
+ "epoch": 0.24,
75
+ "eval_avg_sts": 0.8308924909456856,
76
+ "eval_sickr_spearman": 0.8108744071244177,
77
+ "eval_stsb_spearman": 0.8509105747669534,
78
+ "step": 250
79
+ },
80
+ {
81
+ "epoch": 0.27,
82
+ "eval_avg_sts": 0.8292742145946509,
83
+ "eval_sickr_spearman": 0.8081889402119113,
84
+ "eval_stsb_spearman": 0.8503594889773904,
85
+ "step": 275
86
+ },
87
+ {
88
+ "epoch": 0.29,
89
+ "eval_avg_sts": 0.8302783515064169,
90
+ "eval_sickr_spearman": 0.8108542340618137,
91
+ "eval_stsb_spearman": 0.8497024689510201,
92
+ "step": 300
93
+ },
94
+ {
95
+ "epoch": 0.32,
96
+ "eval_avg_sts": 0.8354548973044524,
97
+ "eval_sickr_spearman": 0.8162711816820041,
98
+ "eval_stsb_spearman": 0.8546386129269009,
99
+ "step": 325
100
+ },
101
+ {
102
+ "epoch": 0.34,
103
+ "eval_avg_sts": 0.8376245074641093,
104
+ "eval_sickr_spearman": 0.820640234762119,
105
+ "eval_stsb_spearman": 0.8546087801660995,
106
+ "step": 350
107
+ },
108
+ {
109
+ "epoch": 0.37,
110
+ "eval_avg_sts": 0.842346577377938,
111
+ "eval_sickr_spearman": 0.8251871470108594,
112
+ "eval_stsb_spearman": 0.8595060077450167,
113
+ "step": 375
114
+ },
115
+ {
116
+ "epoch": 0.39,
117
+ "eval_avg_sts": 0.8392487017993179,
118
+ "eval_sickr_spearman": 0.8222254052028811,
119
+ "eval_stsb_spearman": 0.8562719983957546,
120
+ "step": 400
121
+ },
122
+ {
123
+ "epoch": 0.41,
124
+ "eval_avg_sts": 0.836585408133989,
125
+ "eval_sickr_spearman": 0.8178774858073988,
126
+ "eval_stsb_spearman": 0.855293330460579,
127
+ "step": 425
128
+ },
129
+ {
130
+ "epoch": 0.44,
131
+ "eval_avg_sts": 0.8377417996341128,
132
+ "eval_sickr_spearman": 0.8186622659737961,
133
+ "eval_stsb_spearman": 0.8568213332944294,
134
+ "step": 450
135
+ },
136
+ {
137
+ "epoch": 0.46,
138
+ "eval_avg_sts": 0.8409718734645293,
139
+ "eval_sickr_spearman": 0.8244389665437579,
140
+ "eval_stsb_spearman": 0.8575047803853006,
141
+ "step": 475
142
+ },
143
+ {
144
+ "epoch": 0.49,
145
+ "learning_rate": 8.377150275884453e-06,
146
+ "loss": 0.5144,
147
+ "step": 500
148
+ },
149
+ {
150
+ "epoch": 0.49,
151
+ "eval_avg_sts": 0.8409672497360382,
152
+ "eval_sickr_spearman": 0.8242925677465746,
153
+ "eval_stsb_spearman": 0.8576419317255018,
154
+ "step": 500
155
+ },
156
+ {
157
+ "epoch": 0.51,
158
+ "eval_avg_sts": 0.8413119218825833,
159
+ "eval_sickr_spearman": 0.8241660538253865,
160
+ "eval_stsb_spearman": 0.85845778993978,
161
+ "step": 525
162
+ },
163
+ {
164
+ "epoch": 0.54,
165
+ "eval_avg_sts": 0.8405090163812534,
166
+ "eval_sickr_spearman": 0.8231903980618742,
167
+ "eval_stsb_spearman": 0.8578276347006326,
168
+ "step": 550
169
+ },
170
+ {
171
+ "epoch": 0.56,
172
+ "eval_avg_sts": 0.8461025764167975,
173
+ "eval_sickr_spearman": 0.8310040496127249,
174
+ "eval_stsb_spearman": 0.8612011032208701,
175
+ "step": 575
176
+ },
177
+ {
178
+ "epoch": 0.58,
179
+ "eval_avg_sts": 0.8447104900811846,
180
+ "eval_sickr_spearman": 0.8284478343941886,
181
+ "eval_stsb_spearman": 0.8609731457681805,
182
+ "step": 600
183
+ },
184
+ {
185
+ "epoch": 0.61,
186
+ "eval_avg_sts": 0.8462395445603483,
187
+ "eval_sickr_spearman": 0.8308051047905681,
188
+ "eval_stsb_spearman": 0.8616739843301284,
189
+ "step": 625
190
+ },
191
+ {
192
+ "epoch": 0.63,
193
+ "eval_avg_sts": 0.8436677522914944,
194
+ "eval_sickr_spearman": 0.8276286639591616,
195
+ "eval_stsb_spearman": 0.8597068406238272,
196
+ "step": 650
197
+ },
198
+ {
199
+ "epoch": 0.66,
200
+ "eval_avg_sts": 0.8447115186169327,
201
+ "eval_sickr_spearman": 0.8297105720509965,
202
+ "eval_stsb_spearman": 0.8597124651828688,
203
+ "step": 675
204
+ },
205
+ {
206
+ "epoch": 0.68,
207
+ "eval_avg_sts": 0.8434972279862564,
208
+ "eval_sickr_spearman": 0.8294735385653995,
209
+ "eval_stsb_spearman": 0.8575209174071132,
210
+ "step": 700
211
+ },
212
+ {
213
+ "epoch": 0.71,
214
+ "eval_avg_sts": 0.8440935405127372,
215
+ "eval_sickr_spearman": 0.8307528469522036,
216
+ "eval_stsb_spearman": 0.8574342340732707,
217
+ "step": 725
218
+ },
219
+ {
220
+ "epoch": 0.73,
221
+ "eval_avg_sts": 0.8430030266304707,
222
+ "eval_sickr_spearman": 0.8273284695751733,
223
+ "eval_stsb_spearman": 0.858677583685768,
224
+ "step": 750
225
+ },
226
+ {
227
+ "epoch": 0.75,
228
+ "eval_avg_sts": 0.8459169389588843,
229
+ "eval_sickr_spearman": 0.8317631811709543,
230
+ "eval_stsb_spearman": 0.8600706967468144,
231
+ "step": 775
232
+ },
233
+ {
234
+ "epoch": 0.78,
235
+ "eval_avg_sts": 0.8451542594549106,
236
+ "eval_sickr_spearman": 0.8302367527672511,
237
+ "eval_stsb_spearman": 0.8600717661425702,
238
+ "step": 800
239
+ },
240
+ {
241
+ "epoch": 0.8,
242
+ "eval_avg_sts": 0.8441819324586433,
243
+ "eval_sickr_spearman": 0.8280832303031719,
244
+ "eval_stsb_spearman": 0.8602806346141146,
245
+ "step": 825
246
+ },
247
+ {
248
+ "epoch": 0.83,
249
+ "eval_avg_sts": 0.84041329364274,
250
+ "eval_sickr_spearman": 0.8234209671109596,
251
+ "eval_stsb_spearman": 0.8574056201745204,
252
+ "step": 850
253
+ },
254
+ {
255
+ "epoch": 0.85,
256
+ "eval_avg_sts": 0.8463098863081795,
257
+ "eval_sickr_spearman": 0.8311718222500483,
258
+ "eval_stsb_spearman": 0.8614479503663108,
259
+ "step": 875
260
+ },
261
+ {
262
+ "epoch": 0.88,
263
+ "eval_avg_sts": 0.8489707536403015,
264
+ "eval_sickr_spearman": 0.8359545191757484,
265
+ "eval_stsb_spearman": 0.8619869881048546,
266
+ "step": 900
267
+ },
268
+ {
269
+ "epoch": 0.9,
270
+ "eval_avg_sts": 0.8459381219997153,
271
+ "eval_sickr_spearman": 0.8324996900804063,
272
+ "eval_stsb_spearman": 0.8593765539190245,
273
+ "step": 925
274
+ },
275
+ {
276
+ "epoch": 0.93,
277
+ "eval_avg_sts": 0.8441563920982813,
278
+ "eval_sickr_spearman": 0.8308032796087136,
279
+ "eval_stsb_spearman": 0.8575095045878491,
280
+ "step": 950
281
+ },
282
+ {
283
+ "epoch": 0.95,
284
+ "eval_avg_sts": 0.8417274944195297,
285
+ "eval_sickr_spearman": 0.8286961551886236,
286
+ "eval_stsb_spearman": 0.8547588336504358,
287
+ "step": 975
288
+ },
289
+ {
290
+ "epoch": 0.97,
291
+ "learning_rate": 6.754300551768907e-06,
292
+ "loss": 0.338,
293
+ "step": 1000
294
+ },
295
+ {
296
+ "epoch": 0.97,
297
+ "eval_avg_sts": 0.8439797180911035,
298
+ "eval_sickr_spearman": 0.8322901784159331,
299
+ "eval_stsb_spearman": 0.8556692577662738,
300
+ "step": 1000
301
+ },
302
+ {
303
+ "epoch": 1.0,
304
+ "eval_avg_sts": 0.846580279698808,
305
+ "eval_sickr_spearman": 0.8351822751268263,
306
+ "eval_stsb_spearman": 0.8579782842707896,
307
+ "step": 1025
308
+ },
309
+ {
310
+ "epoch": 1.02,
311
+ "eval_avg_sts": 0.8444154454958996,
312
+ "eval_sickr_spearman": 0.8319871021658588,
313
+ "eval_stsb_spearman": 0.8568437888259406,
314
+ "step": 1050
315
+ },
316
+ {
317
+ "epoch": 1.05,
318
+ "eval_avg_sts": 0.8432409670453203,
319
+ "eval_sickr_spearman": 0.8314499703584763,
320
+ "eval_stsb_spearman": 0.8550319637321643,
321
+ "step": 1075
322
+ },
323
+ {
324
+ "epoch": 1.07,
325
+ "eval_avg_sts": 0.8446753892080279,
326
+ "eval_sickr_spearman": 0.8331656893329471,
327
+ "eval_stsb_spearman": 0.8561850890831085,
328
+ "step": 1100
329
+ },
330
+ {
331
+ "epoch": 1.1,
332
+ "eval_avg_sts": 0.8474510190292008,
333
+ "eval_sickr_spearman": 0.8356278596548676,
334
+ "eval_stsb_spearman": 0.8592741784035342,
335
+ "step": 1125
336
+ },
337
+ {
338
+ "epoch": 1.12,
339
+ "eval_avg_sts": 0.8505734569059473,
340
+ "eval_sickr_spearman": 0.8389641479918606,
341
+ "eval_stsb_spearman": 0.862182765820034,
342
+ "step": 1150
343
+ },
344
+ {
345
+ "epoch": 1.14,
346
+ "eval_avg_sts": 0.8521021908496921,
347
+ "eval_sickr_spearman": 0.8412426954129831,
348
+ "eval_stsb_spearman": 0.862961686286401,
349
+ "step": 1175
350
+ },
351
+ {
352
+ "epoch": 1.17,
353
+ "eval_avg_sts": 0.8514560490494238,
354
+ "eval_sickr_spearman": 0.840301718104709,
355
+ "eval_stsb_spearman": 0.8626103799941387,
356
+ "step": 1200
357
+ },
358
+ {
359
+ "epoch": 1.19,
360
+ "eval_avg_sts": 0.8486972541916555,
361
+ "eval_sickr_spearman": 0.8367856973861348,
362
+ "eval_stsb_spearman": 0.8606088109971762,
363
+ "step": 1225
364
+ },
365
+ {
366
+ "epoch": 1.22,
367
+ "eval_avg_sts": 0.848898608926278,
368
+ "eval_sickr_spearman": 0.8370826736863266,
369
+ "eval_stsb_spearman": 0.8607145441662293,
370
+ "step": 1250
371
+ },
372
+ {
373
+ "epoch": 1.24,
374
+ "eval_avg_sts": 0.8454701656945296,
375
+ "eval_sickr_spearman": 0.8321730305595257,
376
+ "eval_stsb_spearman": 0.8587673008295333,
377
+ "step": 1275
378
+ },
379
+ {
380
+ "epoch": 1.27,
381
+ "eval_avg_sts": 0.8452672490522616,
382
+ "eval_sickr_spearman": 0.833004064676608,
383
+ "eval_stsb_spearman": 0.8575304334279151,
384
+ "step": 1300
385
+ },
386
+ {
387
+ "epoch": 1.29,
388
+ "eval_avg_sts": 0.8489039116599046,
389
+ "eval_sickr_spearman": 0.8371614927237867,
390
+ "eval_stsb_spearman": 0.8606463305960226,
391
+ "step": 1325
392
+ },
393
+ {
394
+ "epoch": 1.31,
395
+ "eval_avg_sts": 0.8467466937976185,
396
+ "eval_sickr_spearman": 0.8343573409596268,
397
+ "eval_stsb_spearman": 0.85913604663561,
398
+ "step": 1350
399
+ },
400
+ {
401
+ "epoch": 1.34,
402
+ "eval_avg_sts": 0.8479786014484607,
403
+ "eval_sickr_spearman": 0.8356114810492773,
404
+ "eval_stsb_spearman": 0.8603457218476441,
405
+ "step": 1375
406
+ },
407
+ {
408
+ "epoch": 1.36,
409
+ "eval_avg_sts": 0.8481320629670138,
410
+ "eval_sickr_spearman": 0.8365469828119875,
411
+ "eval_stsb_spearman": 0.8597171431220402,
412
+ "step": 1400
413
+ },
414
+ {
415
+ "epoch": 1.39,
416
+ "eval_avg_sts": 0.8493017107452288,
417
+ "eval_sickr_spearman": 0.8375811404570515,
418
+ "eval_stsb_spearman": 0.8610222810334061,
419
+ "step": 1425
420
+ },
421
+ {
422
+ "epoch": 1.41,
423
+ "eval_avg_sts": 0.8515707185583519,
424
+ "eval_sickr_spearman": 0.8413120523234598,
425
+ "eval_stsb_spearman": 0.8618293847932439,
426
+ "step": 1450
427
+ },
428
+ {
429
+ "epoch": 1.44,
430
+ "eval_avg_sts": 0.8491061717045717,
431
+ "eval_sickr_spearman": 0.8386661630385385,
432
+ "eval_stsb_spearman": 0.8595461803706049,
433
+ "step": 1475
434
+ },
435
+ {
436
+ "epoch": 1.46,
437
+ "learning_rate": 5.131450827653359e-06,
438
+ "loss": 0.2965,
439
+ "step": 1500
440
+ },
441
+ {
442
+ "epoch": 1.46,
443
+ "eval_avg_sts": 0.849951123911407,
444
+ "eval_sickr_spearman": 0.8403214588874001,
445
+ "eval_stsb_spearman": 0.859580788935414,
446
+ "step": 1500
447
+ },
448
+ {
449
+ "epoch": 1.48,
450
+ "eval_avg_sts": 0.8462728434267617,
451
+ "eval_sickr_spearman": 0.8349335220524785,
452
+ "eval_stsb_spearman": 0.8576121648010449,
453
+ "step": 1525
454
+ },
455
+ {
456
+ "epoch": 1.51,
457
+ "eval_avg_sts": 0.8478142980726532,
458
+ "eval_sickr_spearman": 0.8355811253931684,
459
+ "eval_stsb_spearman": 0.8600474707521379,
460
+ "step": 1550
461
+ },
462
+ {
463
+ "epoch": 1.53,
464
+ "eval_avg_sts": 0.8506859270446779,
465
+ "eval_sickr_spearman": 0.8387632819256463,
466
+ "eval_stsb_spearman": 0.8626085721637096,
467
+ "step": 1575
468
+ },
469
+ {
470
+ "epoch": 1.56,
471
+ "eval_avg_sts": 0.8491205822275252,
472
+ "eval_sickr_spearman": 0.8368930468978489,
473
+ "eval_stsb_spearman": 0.8613481175572015,
474
+ "step": 1600
475
+ },
476
+ {
477
+ "epoch": 1.58,
478
+ "eval_avg_sts": 0.8460304537726628,
479
+ "eval_sickr_spearman": 0.8335511389219881,
480
+ "eval_stsb_spearman": 0.8585097686233377,
481
+ "step": 1625
482
+ },
483
+ {
484
+ "epoch": 1.61,
485
+ "eval_avg_sts": 0.8488092782108085,
486
+ "eval_sickr_spearman": 0.8360739244939235,
487
+ "eval_stsb_spearman": 0.8615446319276937,
488
+ "step": 1650
489
+ },
490
+ {
491
+ "epoch": 1.63,
492
+ "eval_avg_sts": 0.8483771836085118,
493
+ "eval_sickr_spearman": 0.8353614791662919,
494
+ "eval_stsb_spearman": 0.8613928880507317,
495
+ "step": 1675
496
+ },
497
+ {
498
+ "epoch": 1.66,
499
+ "eval_avg_sts": 0.8482633646541247,
500
+ "eval_sickr_spearman": 0.835166232738946,
501
+ "eval_stsb_spearman": 0.8613604965693032,
502
+ "step": 1700
503
+ },
504
+ {
505
+ "epoch": 1.68,
506
+ "eval_avg_sts": 0.8494820125153382,
507
+ "eval_sickr_spearman": 0.8363321877263562,
508
+ "eval_stsb_spearman": 0.8626318373043202,
509
+ "step": 1725
510
+ },
511
+ {
512
+ "epoch": 1.7,
513
+ "eval_avg_sts": 0.84774811911999,
514
+ "eval_sickr_spearman": 0.8350676729187949,
515
+ "eval_stsb_spearman": 0.8604285653211853,
516
+ "step": 1750
517
+ },
518
+ {
519
+ "epoch": 1.73,
520
+ "eval_avg_sts": 0.8508953589183318,
521
+ "eval_sickr_spearman": 0.8393342276284411,
522
+ "eval_stsb_spearman": 0.8624564902082225,
523
+ "step": 1775
524
+ },
525
+ {
526
+ "epoch": 1.75,
527
+ "eval_avg_sts": 0.8497877526052855,
528
+ "eval_sickr_spearman": 0.8380021810922581,
529
+ "eval_stsb_spearman": 0.861573324118313,
530
+ "step": 1800
531
+ },
532
+ {
533
+ "epoch": 1.78,
534
+ "eval_avg_sts": 0.847915612163006,
535
+ "eval_sickr_spearman": 0.8351774720166826,
536
+ "eval_stsb_spearman": 0.8606537523093295,
537
+ "step": 1825
538
+ },
539
+ {
540
+ "epoch": 1.8,
541
+ "eval_avg_sts": 0.8496201121858785,
542
+ "eval_sickr_spearman": 0.8377723042407751,
543
+ "eval_stsb_spearman": 0.861467920130982,
544
+ "step": 1850
545
+ },
546
+ {
547
+ "epoch": 1.83,
548
+ "eval_avg_sts": 0.8506282266864784,
549
+ "eval_sickr_spearman": 0.8396311078664301,
550
+ "eval_stsb_spearman": 0.8616253455065267,
551
+ "step": 1875
552
+ },
553
+ {
554
+ "epoch": 1.85,
555
+ "eval_avg_sts": 0.8507148324349076,
556
+ "eval_sickr_spearman": 0.8398456147654526,
557
+ "eval_stsb_spearman": 0.8615840501043628,
558
+ "step": 1900
559
+ },
560
+ {
561
+ "epoch": 1.87,
562
+ "eval_avg_sts": 0.8528364235468135,
563
+ "eval_sickr_spearman": 0.8424179684340722,
564
+ "eval_stsb_spearman": 0.8632548786595547,
565
+ "step": 1925
566
+ },
567
+ {
568
+ "epoch": 1.9,
569
+ "eval_avg_sts": 0.8505338171378869,
570
+ "eval_sickr_spearman": 0.8398606965313044,
571
+ "eval_stsb_spearman": 0.8612069377444695,
572
+ "step": 1950
573
+ },
574
+ {
575
+ "epoch": 1.92,
576
+ "eval_avg_sts": 0.8481722210917726,
577
+ "eval_sickr_spearman": 0.8373791216444029,
578
+ "eval_stsb_spearman": 0.8589653205391424,
579
+ "step": 1975
580
+ },
581
+ {
582
+ "epoch": 1.95,
583
+ "learning_rate": 3.5086011035378126e-06,
584
+ "loss": 0.284,
585
+ "step": 2000
586
+ },
587
+ {
588
+ "epoch": 1.95,
589
+ "eval_avg_sts": 0.8482272752573701,
590
+ "eval_sickr_spearman": 0.8377695664679932,
591
+ "eval_stsb_spearman": 0.858684984046747,
592
+ "step": 2000
593
+ },
594
+ {
595
+ "epoch": 1.97,
596
+ "eval_avg_sts": 0.8480501028062337,
597
+ "eval_sickr_spearman": 0.837576385378009,
598
+ "eval_stsb_spearman": 0.8585238202344584,
599
+ "step": 2025
600
+ },
601
+ {
602
+ "epoch": 2.0,
603
+ "eval_avg_sts": 0.8480832424004543,
604
+ "eval_sickr_spearman": 0.8374597658637174,
605
+ "eval_stsb_spearman": 0.8587067189371913,
606
+ "step": 2050
607
+ },
608
+ {
609
+ "epoch": 2.02,
610
+ "eval_avg_sts": 0.8499478012984727,
611
+ "eval_sickr_spearman": 0.8390217853135862,
612
+ "eval_stsb_spearman": 0.8608738172833591,
613
+ "step": 2075
614
+ },
615
+ {
616
+ "epoch": 2.04,
617
+ "eval_avg_sts": 0.8491869466527012,
618
+ "eval_sickr_spearman": 0.8378290289715736,
619
+ "eval_stsb_spearman": 0.8605448643338288,
620
+ "step": 2100
621
+ },
622
+ {
623
+ "epoch": 2.07,
624
+ "eval_avg_sts": 0.8477584257424788,
625
+ "eval_sickr_spearman": 0.8359002920622246,
626
+ "eval_stsb_spearman": 0.8596165594227332,
627
+ "step": 2125
628
+ },
629
+ {
630
+ "epoch": 2.09,
631
+ "eval_avg_sts": 0.848914705769936,
632
+ "eval_sickr_spearman": 0.837553906822536,
633
+ "eval_stsb_spearman": 0.860275504717336,
634
+ "step": 2150
635
+ },
636
+ {
637
+ "epoch": 2.12,
638
+ "eval_avg_sts": 0.849736568959095,
639
+ "eval_sickr_spearman": 0.8393637187247242,
640
+ "eval_stsb_spearman": 0.860109419193466,
641
+ "step": 2175
642
+ },
643
+ {
644
+ "epoch": 2.14,
645
+ "eval_avg_sts": 0.8495688115163362,
646
+ "eval_sickr_spearman": 0.8389394600057214,
647
+ "eval_stsb_spearman": 0.860198163026951,
648
+ "step": 2200
649
+ },
650
+ {
651
+ "epoch": 2.17,
652
+ "eval_avg_sts": 0.8483895280681955,
653
+ "eval_sickr_spearman": 0.8374167780279301,
654
+ "eval_stsb_spearman": 0.8593622781084608,
655
+ "step": 2225
656
+ },
657
+ {
658
+ "epoch": 2.19,
659
+ "eval_avg_sts": 0.8490834154561102,
660
+ "eval_sickr_spearman": 0.8387396506237387,
661
+ "eval_stsb_spearman": 0.8594271802884818,
662
+ "step": 2250
663
+ },
664
+ {
665
+ "epoch": 2.22,
666
+ "eval_avg_sts": 0.8485035624729105,
667
+ "eval_sickr_spearman": 0.8381763418660725,
668
+ "eval_stsb_spearman": 0.8588307830797486,
669
+ "step": 2275
670
+ },
671
+ {
672
+ "epoch": 2.24,
673
+ "eval_avg_sts": 0.8476220200458998,
674
+ "eval_sickr_spearman": 0.8364356947499554,
675
+ "eval_stsb_spearman": 0.8588083453418442,
676
+ "step": 2300
677
+ },
678
+ {
679
+ "epoch": 2.26,
680
+ "eval_avg_sts": 0.8471502937152022,
681
+ "eval_sickr_spearman": 0.8358850662030688,
682
+ "eval_stsb_spearman": 0.8584155212273358,
683
+ "step": 2325
684
+ },
685
+ {
686
+ "epoch": 2.29,
687
+ "eval_avg_sts": 0.8481508305259742,
688
+ "eval_sickr_spearman": 0.8369382921954037,
689
+ "eval_stsb_spearman": 0.8593633688565446,
690
+ "step": 2350
691
+ },
692
+ {
693
+ "epoch": 2.31,
694
+ "eval_avg_sts": 0.8478439833733975,
695
+ "eval_sickr_spearman": 0.8367764273835573,
696
+ "eval_stsb_spearman": 0.8589115393632376,
697
+ "step": 2375
698
+ },
699
+ {
700
+ "epoch": 2.34,
701
+ "eval_avg_sts": 0.8466942437126351,
702
+ "eval_sickr_spearman": 0.8354506248905611,
703
+ "eval_stsb_spearman": 0.8579378625347092,
704
+ "step": 2400
705
+ },
706
+ {
707
+ "epoch": 2.36,
708
+ "eval_avg_sts": 0.8477441088420843,
709
+ "eval_sickr_spearman": 0.8366948705733154,
710
+ "eval_stsb_spearman": 0.8587933471108533,
711
+ "step": 2425
712
+ },
713
+ {
714
+ "epoch": 2.39,
715
+ "eval_avg_sts": 0.8488516946426614,
716
+ "eval_sickr_spearman": 0.8377335431419145,
717
+ "eval_stsb_spearman": 0.8599698461434083,
718
+ "step": 2450
719
+ },
720
+ {
721
+ "epoch": 2.41,
722
+ "eval_avg_sts": 0.8480771808294114,
723
+ "eval_sickr_spearman": 0.8370132207136473,
724
+ "eval_stsb_spearman": 0.8591411409451756,
725
+ "step": 2475
726
+ },
727
+ {
728
+ "epoch": 2.43,
729
+ "learning_rate": 1.8857513794222658e-06,
730
+ "loss": 0.2655,
731
+ "step": 2500
732
+ },
733
+ {
734
+ "epoch": 2.43,
735
+ "eval_avg_sts": 0.8487999481558521,
736
+ "eval_sickr_spearman": 0.8377923812411763,
737
+ "eval_stsb_spearman": 0.8598075150705279,
738
+ "step": 2500
739
+ },
740
+ {
741
+ "epoch": 2.46,
742
+ "eval_avg_sts": 0.848399750999945,
743
+ "eval_sickr_spearman": 0.8369205206878717,
744
+ "eval_stsb_spearman": 0.8598789813120182,
745
+ "step": 2525
746
+ },
747
+ {
748
+ "epoch": 2.48,
749
+ "eval_avg_sts": 0.8485584973593749,
750
+ "eval_sickr_spearman": 0.8373407447943537,
751
+ "eval_stsb_spearman": 0.8597762499243962,
752
+ "step": 2550
753
+ },
754
+ {
755
+ "epoch": 2.51,
756
+ "eval_avg_sts": 0.8484553378772859,
757
+ "eval_sickr_spearman": 0.8371878137673748,
758
+ "eval_stsb_spearman": 0.8597228619871968,
759
+ "step": 2575
760
+ },
761
+ {
762
+ "epoch": 2.53,
763
+ "eval_avg_sts": 0.8480636244098692,
764
+ "eval_sickr_spearman": 0.8368399244996585,
765
+ "eval_stsb_spearman": 0.8592873243200797,
766
+ "step": 2600
767
+ },
768
+ {
769
+ "epoch": 2.56,
770
+ "eval_avg_sts": 0.8487639345972604,
771
+ "eval_sickr_spearman": 0.8379995874127802,
772
+ "eval_stsb_spearman": 0.8595282817817406,
773
+ "step": 2625
774
+ },
775
+ {
776
+ "epoch": 2.58,
777
+ "eval_avg_sts": 0.8484073121905233,
778
+ "eval_sickr_spearman": 0.8373334920980365,
779
+ "eval_stsb_spearman": 0.85948113228301,
780
+ "step": 2650
781
+ },
782
+ {
783
+ "epoch": 2.6,
784
+ "eval_avg_sts": 0.8495353514397407,
785
+ "eval_sickr_spearman": 0.8388814384351839,
786
+ "eval_stsb_spearman": 0.8601892644442974,
787
+ "step": 2675
788
+ },
789
+ {
790
+ "epoch": 2.63,
791
+ "eval_avg_sts": 0.8488734860883285,
792
+ "eval_sickr_spearman": 0.8376559248819906,
793
+ "eval_stsb_spearman": 0.8600910472946663,
794
+ "step": 2700
795
+ },
796
+ {
797
+ "epoch": 2.65,
798
+ "eval_avg_sts": 0.8485291479370792,
799
+ "eval_sickr_spearman": 0.8370302717546578,
800
+ "eval_stsb_spearman": 0.8600280241195006,
801
+ "step": 2725
802
+ },
803
+ {
804
+ "epoch": 2.68,
805
+ "eval_avg_sts": 0.8484609936724099,
806
+ "eval_sickr_spearman": 0.8370401181304525,
807
+ "eval_stsb_spearman": 0.8598818692143673,
808
+ "step": 2750
809
+ },
810
+ {
811
+ "epoch": 2.7,
812
+ "eval_avg_sts": 0.8487467947632423,
813
+ "eval_sickr_spearman": 0.8374640886628467,
814
+ "eval_stsb_spearman": 0.8600295008636379,
815
+ "step": 2775
816
+ },
817
+ {
818
+ "epoch": 2.73,
819
+ "eval_avg_sts": 0.8487549703295775,
820
+ "eval_sickr_spearman": 0.8377412761492462,
821
+ "eval_stsb_spearman": 0.8597686645099087,
822
+ "step": 2800
823
+ },
824
+ {
825
+ "epoch": 2.75,
826
+ "eval_avg_sts": 0.8483512050356906,
827
+ "eval_sickr_spearman": 0.8371601478529463,
828
+ "eval_stsb_spearman": 0.8595422622184349,
829
+ "step": 2825
830
+ },
831
+ {
832
+ "epoch": 2.78,
833
+ "eval_avg_sts": 0.8487416863586705,
834
+ "eval_sickr_spearman": 0.8376103433667258,
835
+ "eval_stsb_spearman": 0.8598730293506154,
836
+ "step": 2850
837
+ },
838
+ {
839
+ "epoch": 2.8,
840
+ "eval_avg_sts": 0.8486695192263061,
841
+ "eval_sickr_spearman": 0.8374840215699435,
842
+ "eval_stsb_spearman": 0.8598550168826687,
843
+ "step": 2875
844
+ },
845
+ {
846
+ "epoch": 2.82,
847
+ "eval_avg_sts": 0.8490062168713867,
848
+ "eval_sickr_spearman": 0.8380430075284804,
849
+ "eval_stsb_spearman": 0.859969426214293,
850
+ "step": 2900
851
+ },
852
+ {
853
+ "epoch": 2.85,
854
+ "eval_avg_sts": 0.8488936976734611,
855
+ "eval_sickr_spearman": 0.8378850332358503,
856
+ "eval_stsb_spearman": 0.859902362111072,
857
+ "step": 2925
858
+ },
859
+ {
860
+ "epoch": 2.87,
861
+ "eval_avg_sts": 0.8489709020581676,
862
+ "eval_sickr_spearman": 0.8379462248590827,
863
+ "eval_stsb_spearman": 0.8599955792572526,
864
+ "step": 2950
865
+ },
866
+ {
867
+ "epoch": 2.9,
868
+ "eval_avg_sts": 0.8487957508450565,
869
+ "eval_sickr_spearman": 0.8376585185614683,
870
+ "eval_stsb_spearman": 0.8599329831286449,
871
+ "step": 2975
872
+ },
873
+ {
874
+ "epoch": 2.92,
875
+ "learning_rate": 2.6290165530671865e-07,
876
+ "loss": 0.263,
877
+ "step": 3000
878
+ },
879
+ {
880
+ "epoch": 2.92,
881
+ "eval_avg_sts": 0.8485199174178477,
882
+ "eval_sickr_spearman": 0.837266008400516,
883
+ "eval_stsb_spearman": 0.8597738264351793,
884
+ "step": 3000
885
+ },
886
+ {
887
+ "epoch": 2.95,
888
+ "eval_avg_sts": 0.8483285540592784,
889
+ "eval_sickr_spearman": 0.837077006016357,
890
+ "eval_stsb_spearman": 0.8595801021021999,
891
+ "step": 3025
892
+ },
893
+ {
894
+ "epoch": 2.97,
895
+ "eval_avg_sts": 0.8483039392081567,
896
+ "eval_sickr_spearman": 0.8370466464338976,
897
+ "eval_stsb_spearman": 0.8595612319824159,
898
+ "step": 3050
899
+ },
900
+ {
901
+ "epoch": 2.99,
902
+ "eval_avg_sts": 0.8482931559093356,
903
+ "eval_sickr_spearman": 0.837029215070426,
904
+ "eval_stsb_spearman": 0.8595570967482453,
905
+ "step": 3075
906
+ },
907
+ {
908
+ "epoch": 3.0,
909
+ "step": 3081,
910
+ "train_runtime": 8699.7484,
911
+ "train_samples_per_second": 0.354
912
+ }
913
+ ],
914
+ "max_steps": 3081,
915
+ "num_train_epochs": 3,
916
+ "total_flos": 333879495592513536,
917
+ "trial_name": null,
918
+ "trial_params": null
919
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:864a98bc0117934b9f37e527ccfbfef287c1f6ba63f3578b5eafd7d7adeffcfe
3
+ size 2488
vocab.txt ADDED
The diff for this file is too large to render. See raw diff