qianjiaying commited on
Commit
9d88a96
·
1 Parent(s): 5abeb01
model/.DS_Store ADDED
Binary file (8.2 kB). View file
 
model/my-unsup-simcse-bert-base-uncased-0413/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForCL"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "transformers_version": "4.2.2",
21
+ "type_vocab_size": 2,
22
+ "use_cache": true,
23
+ "vocab_size": 30522
24
+ }
model/my-unsup-simcse-bert-base-uncased-0413/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe0a42b6c6994f10fd145e4cc4141592b3d6f0ad929d39c3c44f9f7c6e8c5d1
3
+ size 875974661
model/my-unsup-simcse-bert-base-uncased-0413/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97cea0b156dd8b46dcdda7a4b9b9be70ca097d43b955fcc4770c6e00b3ec4c18
3
+ size 438014253
model/my-unsup-simcse-bert-base-uncased-0413/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2ede01415a15a84a2fcc3aa958983b10cddb77be0e10206cc7e73b7b95df800
3
+ size 627
model/my-unsup-simcse-bert-base-uncased-0413/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
model/my-unsup-simcse-bert-base-uncased-0413/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-uncased"}
model/my-unsup-simcse-bert-base-uncased-0413/trainer_state.json ADDED
@@ -0,0 +1,741 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5772300834060221,
3
+ "best_model_checkpoint": "result/my-unsup-simcse-bert-base-uncased-0413",
4
+ "epoch": 1.1541953054078589,
5
+ "global_step": 8900,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "eval_avg_sts": 0.43413417287570444,
13
+ "eval_sickr_spearman": 0.4582097432098862,
14
+ "eval_stsb_spearman": 0.4100586025415227,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "eval_avg_sts": 0.44202934238660074,
20
+ "eval_sickr_spearman": 0.46371670111417157,
21
+ "eval_stsb_spearman": 0.42034198365902986,
22
+ "step": 200
23
+ },
24
+ {
25
+ "epoch": 0.04,
26
+ "eval_avg_sts": 0.45392676705884744,
27
+ "eval_sickr_spearman": 0.4647000418539141,
28
+ "eval_stsb_spearman": 0.44315349226378076,
29
+ "step": 300
30
+ },
31
+ {
32
+ "epoch": 0.05,
33
+ "eval_avg_sts": 0.46705471818431266,
34
+ "eval_sickr_spearman": 0.4777946649767838,
35
+ "eval_stsb_spearman": 0.4563147713918415,
36
+ "step": 400
37
+ },
38
+ {
39
+ "epoch": 0.06,
40
+ "learning_rate": 9.783858557039727e-06,
41
+ "loss": 0.0626,
42
+ "step": 500
43
+ },
44
+ {
45
+ "epoch": 0.06,
46
+ "eval_avg_sts": 0.4629420855704067,
47
+ "eval_sickr_spearman": 0.472255862452245,
48
+ "eval_stsb_spearman": 0.4536283086885684,
49
+ "step": 500
50
+ },
51
+ {
52
+ "epoch": 0.08,
53
+ "eval_avg_sts": 0.4583178800945026,
54
+ "eval_sickr_spearman": 0.4685936350608932,
55
+ "eval_stsb_spearman": 0.4480421251281119,
56
+ "step": 600
57
+ },
58
+ {
59
+ "epoch": 0.09,
60
+ "eval_avg_sts": 0.46463274114411685,
61
+ "eval_sickr_spearman": 0.4777823209837142,
62
+ "eval_stsb_spearman": 0.4514831613045195,
63
+ "step": 700
64
+ },
65
+ {
66
+ "epoch": 0.1,
67
+ "eval_avg_sts": 0.4757360446446461,
68
+ "eval_sickr_spearman": 0.4900823655954934,
69
+ "eval_stsb_spearman": 0.46138972369379877,
70
+ "step": 800
71
+ },
72
+ {
73
+ "epoch": 0.12,
74
+ "eval_avg_sts": 0.4706008129820051,
75
+ "eval_sickr_spearman": 0.4842226672822465,
76
+ "eval_stsb_spearman": 0.45697895868176375,
77
+ "step": 900
78
+ },
79
+ {
80
+ "epoch": 0.13,
81
+ "learning_rate": 9.567717114079454e-06,
82
+ "loss": 0.006,
83
+ "step": 1000
84
+ },
85
+ {
86
+ "epoch": 0.13,
87
+ "eval_avg_sts": 0.4724716705172539,
88
+ "eval_sickr_spearman": 0.4847777627215667,
89
+ "eval_stsb_spearman": 0.4601655783129412,
90
+ "step": 1000
91
+ },
92
+ {
93
+ "epoch": 0.14,
94
+ "eval_avg_sts": 0.46868978609741097,
95
+ "eval_sickr_spearman": 0.4790444342362035,
96
+ "eval_stsb_spearman": 0.4583351379586184,
97
+ "step": 1100
98
+ },
99
+ {
100
+ "epoch": 0.16,
101
+ "eval_avg_sts": 0.4739309502400343,
102
+ "eval_sickr_spearman": 0.4857525539252532,
103
+ "eval_stsb_spearman": 0.46210934655481545,
104
+ "step": 1200
105
+ },
106
+ {
107
+ "epoch": 0.17,
108
+ "eval_avg_sts": 0.47321307094316606,
109
+ "eval_sickr_spearman": 0.4839474971021075,
110
+ "eval_stsb_spearman": 0.4624786447842246,
111
+ "step": 1300
112
+ },
113
+ {
114
+ "epoch": 0.18,
115
+ "eval_avg_sts": 0.476232403795022,
116
+ "eval_sickr_spearman": 0.4853778633029345,
117
+ "eval_stsb_spearman": 0.46708694428710956,
118
+ "step": 1400
119
+ },
120
+ {
121
+ "epoch": 0.19,
122
+ "learning_rate": 9.35157567111918e-06,
123
+ "loss": 0.0048,
124
+ "step": 1500
125
+ },
126
+ {
127
+ "epoch": 0.19,
128
+ "eval_avg_sts": 0.4671866652183253,
129
+ "eval_sickr_spearman": 0.47849875289276517,
130
+ "eval_stsb_spearman": 0.45587457754388544,
131
+ "step": 1500
132
+ },
133
+ {
134
+ "epoch": 0.21,
135
+ "eval_avg_sts": 0.4719700135797812,
136
+ "eval_sickr_spearman": 0.48377895596716125,
137
+ "eval_stsb_spearman": 0.46016107119240107,
138
+ "step": 1600
139
+ },
140
+ {
141
+ "epoch": 0.22,
142
+ "eval_avg_sts": 0.468405693803544,
143
+ "eval_sickr_spearman": 0.47872507544274157,
144
+ "eval_stsb_spearman": 0.45808631216434653,
145
+ "step": 1700
146
+ },
147
+ {
148
+ "epoch": 0.23,
149
+ "eval_avg_sts": 0.4693147295623183,
150
+ "eval_sickr_spearman": 0.4841435120270765,
151
+ "eval_stsb_spearman": 0.4544859470975601,
152
+ "step": 1800
153
+ },
154
+ {
155
+ "epoch": 0.25,
156
+ "eval_avg_sts": 0.4756334603907184,
157
+ "eval_sickr_spearman": 0.4869564054516981,
158
+ "eval_stsb_spearman": 0.46431051532973877,
159
+ "step": 1900
160
+ },
161
+ {
162
+ "epoch": 0.26,
163
+ "learning_rate": 9.135434228158908e-06,
164
+ "loss": 0.0039,
165
+ "step": 2000
166
+ },
167
+ {
168
+ "epoch": 0.26,
169
+ "eval_avg_sts": 0.47879461277752494,
170
+ "eval_sickr_spearman": 0.4944859050064409,
171
+ "eval_stsb_spearman": 0.463103320548609,
172
+ "step": 2000
173
+ },
174
+ {
175
+ "epoch": 0.27,
176
+ "eval_avg_sts": 0.4919587799046121,
177
+ "eval_sickr_spearman": 0.50618844271633,
178
+ "eval_stsb_spearman": 0.47772911709289423,
179
+ "step": 2100
180
+ },
181
+ {
182
+ "epoch": 0.29,
183
+ "eval_avg_sts": 0.4920605376671945,
184
+ "eval_sickr_spearman": 0.5040317021684543,
185
+ "eval_stsb_spearman": 0.48008937316593464,
186
+ "step": 2200
187
+ },
188
+ {
189
+ "epoch": 0.3,
190
+ "eval_avg_sts": 0.5061405516094162,
191
+ "eval_sickr_spearman": 0.515380875065063,
192
+ "eval_stsb_spearman": 0.4969002281537693,
193
+ "step": 2300
194
+ },
195
+ {
196
+ "epoch": 0.31,
197
+ "eval_avg_sts": 0.49937347650708863,
198
+ "eval_sickr_spearman": 0.5125618336594574,
199
+ "eval_stsb_spearman": 0.48618511935471986,
200
+ "step": 2400
201
+ },
202
+ {
203
+ "epoch": 0.32,
204
+ "learning_rate": 8.919292785198635e-06,
205
+ "loss": 0.0036,
206
+ "step": 2500
207
+ },
208
+ {
209
+ "epoch": 0.32,
210
+ "eval_avg_sts": 0.5097962418074036,
211
+ "eval_sickr_spearman": 0.5162478844771226,
212
+ "eval_stsb_spearman": 0.5033445991376845,
213
+ "step": 2500
214
+ },
215
+ {
216
+ "epoch": 0.34,
217
+ "eval_avg_sts": 0.5038287826668915,
218
+ "eval_sickr_spearman": 0.5139890778386909,
219
+ "eval_stsb_spearman": 0.4936684874950922,
220
+ "step": 2600
221
+ },
222
+ {
223
+ "epoch": 0.35,
224
+ "eval_avg_sts": 0.4876245229950539,
225
+ "eval_sickr_spearman": 0.503567481573055,
226
+ "eval_stsb_spearman": 0.4716815644170527,
227
+ "step": 2700
228
+ },
229
+ {
230
+ "epoch": 0.36,
231
+ "eval_avg_sts": 0.48852558904470145,
232
+ "eval_sickr_spearman": 0.5041236336966068,
233
+ "eval_stsb_spearman": 0.472927544392796,
234
+ "step": 2800
235
+ },
236
+ {
237
+ "epoch": 0.38,
238
+ "eval_avg_sts": 0.4908887011823253,
239
+ "eval_sickr_spearman": 0.506046366718276,
240
+ "eval_stsb_spearman": 0.4757310356463747,
241
+ "step": 2900
242
+ },
243
+ {
244
+ "epoch": 0.39,
245
+ "learning_rate": 8.703151342238363e-06,
246
+ "loss": 0.0031,
247
+ "step": 3000
248
+ },
249
+ {
250
+ "epoch": 0.39,
251
+ "eval_avg_sts": 0.49885341513488746,
252
+ "eval_sickr_spearman": 0.5072558859146905,
253
+ "eval_stsb_spearman": 0.49045094435508446,
254
+ "step": 3000
255
+ },
256
+ {
257
+ "epoch": 0.4,
258
+ "eval_avg_sts": 0.5025732287061541,
259
+ "eval_sickr_spearman": 0.5157659404052926,
260
+ "eval_stsb_spearman": 0.4893805170070156,
261
+ "step": 3100
262
+ },
263
+ {
264
+ "epoch": 0.41,
265
+ "eval_avg_sts": 0.48566033454734003,
266
+ "eval_sickr_spearman": 0.5039478878964448,
267
+ "eval_stsb_spearman": 0.4673727811982352,
268
+ "step": 3200
269
+ },
270
+ {
271
+ "epoch": 0.43,
272
+ "eval_avg_sts": 0.47697364115308033,
273
+ "eval_sickr_spearman": 0.497832856247953,
274
+ "eval_stsb_spearman": 0.4561144260582077,
275
+ "step": 3300
276
+ },
277
+ {
278
+ "epoch": 0.44,
279
+ "eval_avg_sts": 0.4905299355880675,
280
+ "eval_sickr_spearman": 0.5084222731820136,
281
+ "eval_stsb_spearman": 0.4726375979941214,
282
+ "step": 3400
283
+ },
284
+ {
285
+ "epoch": 0.45,
286
+ "learning_rate": 8.487009899278088e-06,
287
+ "loss": 0.0026,
288
+ "step": 3500
289
+ },
290
+ {
291
+ "epoch": 0.45,
292
+ "eval_avg_sts": 0.4927153870008586,
293
+ "eval_sickr_spearman": 0.5115589442614296,
294
+ "eval_stsb_spearman": 0.4738718297402876,
295
+ "step": 3500
296
+ },
297
+ {
298
+ "epoch": 0.47,
299
+ "eval_avg_sts": 0.4990059371177902,
300
+ "eval_sickr_spearman": 0.5237218120475015,
301
+ "eval_stsb_spearman": 0.47429006218807884,
302
+ "step": 3600
303
+ },
304
+ {
305
+ "epoch": 0.48,
306
+ "eval_avg_sts": 0.5081240005291581,
307
+ "eval_sickr_spearman": 0.5338321187447169,
308
+ "eval_stsb_spearman": 0.4824158823135992,
309
+ "step": 3700
310
+ },
311
+ {
312
+ "epoch": 0.49,
313
+ "eval_avg_sts": 0.5053613131195629,
314
+ "eval_sickr_spearman": 0.5345850062597594,
315
+ "eval_stsb_spearman": 0.4761376199793665,
316
+ "step": 3800
317
+ },
318
+ {
319
+ "epoch": 0.51,
320
+ "eval_avg_sts": 0.5291853264881973,
321
+ "eval_sickr_spearman": 0.5583114097481583,
322
+ "eval_stsb_spearman": 0.5000592432282362,
323
+ "step": 3900
324
+ },
325
+ {
326
+ "epoch": 0.52,
327
+ "learning_rate": 8.270868456317816e-06,
328
+ "loss": 0.0043,
329
+ "step": 4000
330
+ },
331
+ {
332
+ "epoch": 0.52,
333
+ "eval_avg_sts": 0.5303577078371549,
334
+ "eval_sickr_spearman": 0.555212106865661,
335
+ "eval_stsb_spearman": 0.5055033088086488,
336
+ "step": 4000
337
+ },
338
+ {
339
+ "epoch": 0.53,
340
+ "eval_avg_sts": 0.5333661650982712,
341
+ "eval_sickr_spearman": 0.5605562393360713,
342
+ "eval_stsb_spearman": 0.506176090860471,
343
+ "step": 4100
344
+ },
345
+ {
346
+ "epoch": 0.54,
347
+ "eval_avg_sts": 0.5278812963738058,
348
+ "eval_sickr_spearman": 0.5576400310122562,
349
+ "eval_stsb_spearman": 0.49812256173535524,
350
+ "step": 4200
351
+ },
352
+ {
353
+ "epoch": 0.56,
354
+ "eval_avg_sts": 0.5228114162776825,
355
+ "eval_sickr_spearman": 0.549996457591598,
356
+ "eval_stsb_spearman": 0.495626374963767,
357
+ "step": 4300
358
+ },
359
+ {
360
+ "epoch": 0.57,
361
+ "eval_avg_sts": 0.5249527931181994,
362
+ "eval_sickr_spearman": 0.5511832580770323,
363
+ "eval_stsb_spearman": 0.4987223281593667,
364
+ "step": 4400
365
+ },
366
+ {
367
+ "epoch": 0.58,
368
+ "learning_rate": 8.054727013357542e-06,
369
+ "loss": 0.0024,
370
+ "step": 4500
371
+ },
372
+ {
373
+ "epoch": 0.58,
374
+ "eval_avg_sts": 0.5365941902025078,
375
+ "eval_sickr_spearman": 0.5552903014988024,
376
+ "eval_stsb_spearman": 0.5178980789062131,
377
+ "step": 4500
378
+ },
379
+ {
380
+ "epoch": 0.6,
381
+ "eval_avg_sts": 0.5347280260801861,
382
+ "eval_sickr_spearman": 0.5567227330369913,
383
+ "eval_stsb_spearman": 0.5127333191233809,
384
+ "step": 4600
385
+ },
386
+ {
387
+ "epoch": 0.61,
388
+ "eval_avg_sts": 0.5353743763935372,
389
+ "eval_sickr_spearman": 0.5572790772849489,
390
+ "eval_stsb_spearman": 0.5134696755021255,
391
+ "step": 4700
392
+ },
393
+ {
394
+ "epoch": 0.62,
395
+ "eval_avg_sts": 0.5372262235055488,
396
+ "eval_sickr_spearman": 0.557209576281168,
397
+ "eval_stsb_spearman": 0.5172428707299297,
398
+ "step": 4800
399
+ },
400
+ {
401
+ "epoch": 0.64,
402
+ "eval_avg_sts": 0.5392746843345577,
403
+ "eval_sickr_spearman": 0.5658935994211787,
404
+ "eval_stsb_spearman": 0.5126557692479369,
405
+ "step": 4900
406
+ },
407
+ {
408
+ "epoch": 0.65,
409
+ "learning_rate": 7.83858557039727e-06,
410
+ "loss": 0.0035,
411
+ "step": 5000
412
+ },
413
+ {
414
+ "epoch": 0.65,
415
+ "eval_avg_sts": 0.5381467627451588,
416
+ "eval_sickr_spearman": 0.5665912992006686,
417
+ "eval_stsb_spearman": 0.5097022262896491,
418
+ "step": 5000
419
+ },
420
+ {
421
+ "epoch": 0.66,
422
+ "eval_avg_sts": 0.5437598393325787,
423
+ "eval_sickr_spearman": 0.5716501269285366,
424
+ "eval_stsb_spearman": 0.5158695517366209,
425
+ "step": 5100
426
+ },
427
+ {
428
+ "epoch": 0.67,
429
+ "eval_avg_sts": 0.5458859184475997,
430
+ "eval_sickr_spearman": 0.5727973978174873,
431
+ "eval_stsb_spearman": 0.5189744390777121,
432
+ "step": 5200
433
+ },
434
+ {
435
+ "epoch": 0.69,
436
+ "eval_avg_sts": 0.5471436350834367,
437
+ "eval_sickr_spearman": 0.5750997686649232,
438
+ "eval_stsb_spearman": 0.5191875015019501,
439
+ "step": 5300
440
+ },
441
+ {
442
+ "epoch": 0.7,
443
+ "eval_avg_sts": 0.5362576150135143,
444
+ "eval_sickr_spearman": 0.5684171054596358,
445
+ "eval_stsb_spearman": 0.504098124567393,
446
+ "step": 5400
447
+ },
448
+ {
449
+ "epoch": 0.71,
450
+ "learning_rate": 7.622444127436995e-06,
451
+ "loss": 0.0021,
452
+ "step": 5500
453
+ },
454
+ {
455
+ "epoch": 0.71,
456
+ "eval_avg_sts": 0.5439797124533817,
457
+ "eval_sickr_spearman": 0.5719825982126912,
458
+ "eval_stsb_spearman": 0.5159768266940723,
459
+ "step": 5500
460
+ },
461
+ {
462
+ "epoch": 0.73,
463
+ "eval_avg_sts": 0.5336924618718794,
464
+ "eval_sickr_spearman": 0.5533836108650136,
465
+ "eval_stsb_spearman": 0.5140013128787451,
466
+ "step": 5600
467
+ },
468
+ {
469
+ "epoch": 0.74,
470
+ "eval_avg_sts": 0.5432756107135348,
471
+ "eval_sickr_spearman": 0.5664231903456354,
472
+ "eval_stsb_spearman": 0.5201280310814343,
473
+ "step": 5700
474
+ },
475
+ {
476
+ "epoch": 0.75,
477
+ "eval_avg_sts": 0.5362600896652556,
478
+ "eval_sickr_spearman": 0.5664119990990002,
479
+ "eval_stsb_spearman": 0.5061081802315109,
480
+ "step": 5800
481
+ },
482
+ {
483
+ "epoch": 0.77,
484
+ "eval_avg_sts": 0.5558055193441328,
485
+ "eval_sickr_spearman": 0.5845013764604193,
486
+ "eval_stsb_spearman": 0.5271096622278463,
487
+ "step": 5900
488
+ },
489
+ {
490
+ "epoch": 0.78,
491
+ "learning_rate": 7.406302684476722e-06,
492
+ "loss": 0.0029,
493
+ "step": 6000
494
+ },
495
+ {
496
+ "epoch": 0.78,
497
+ "eval_avg_sts": 0.5543715317449052,
498
+ "eval_sickr_spearman": 0.582216825151617,
499
+ "eval_stsb_spearman": 0.5265262383381932,
500
+ "step": 6000
501
+ },
502
+ {
503
+ "epoch": 0.79,
504
+ "eval_avg_sts": 0.5577833334850189,
505
+ "eval_sickr_spearman": 0.5861351063447354,
506
+ "eval_stsb_spearman": 0.5294315606253022,
507
+ "step": 6100
508
+ },
509
+ {
510
+ "epoch": 0.8,
511
+ "eval_avg_sts": 0.5459303241614681,
512
+ "eval_sickr_spearman": 0.5734108990761563,
513
+ "eval_stsb_spearman": 0.5184497492467801,
514
+ "step": 6200
515
+ },
516
+ {
517
+ "epoch": 0.82,
518
+ "eval_avg_sts": 0.5527974603364145,
519
+ "eval_sickr_spearman": 0.5769931546836136,
520
+ "eval_stsb_spearman": 0.5286017659892155,
521
+ "step": 6300
522
+ },
523
+ {
524
+ "epoch": 0.83,
525
+ "eval_avg_sts": 0.5613325719571378,
526
+ "eval_sickr_spearman": 0.5828151485822315,
527
+ "eval_stsb_spearman": 0.5398499953320443,
528
+ "step": 6400
529
+ },
530
+ {
531
+ "epoch": 0.84,
532
+ "learning_rate": 7.190161241516449e-06,
533
+ "loss": 0.0013,
534
+ "step": 6500
535
+ },
536
+ {
537
+ "epoch": 0.84,
538
+ "eval_avg_sts": 0.5706535000902035,
539
+ "eval_sickr_spearman": 0.5902679904990792,
540
+ "eval_stsb_spearman": 0.5510390096813279,
541
+ "step": 6500
542
+ },
543
+ {
544
+ "epoch": 0.86,
545
+ "eval_avg_sts": 0.5588971829128009,
546
+ "eval_sickr_spearman": 0.5792282339579348,
547
+ "eval_stsb_spearman": 0.5385661318676669,
548
+ "step": 6600
549
+ },
550
+ {
551
+ "epoch": 0.87,
552
+ "eval_avg_sts": 0.5467875739347965,
553
+ "eval_sickr_spearman": 0.5713499805756498,
554
+ "eval_stsb_spearman": 0.5222251672939431,
555
+ "step": 6700
556
+ },
557
+ {
558
+ "epoch": 0.88,
559
+ "eval_avg_sts": 0.5564259742453666,
560
+ "eval_sickr_spearman": 0.5743576881477043,
561
+ "eval_stsb_spearman": 0.5384942603430287,
562
+ "step": 6800
563
+ },
564
+ {
565
+ "epoch": 0.89,
566
+ "eval_avg_sts": 0.564484253954618,
567
+ "eval_sickr_spearman": 0.5855938918937308,
568
+ "eval_stsb_spearman": 0.5433746160155053,
569
+ "step": 6900
570
+ },
571
+ {
572
+ "epoch": 0.91,
573
+ "learning_rate": 6.974019798556175e-06,
574
+ "loss": 0.0046,
575
+ "step": 7000
576
+ },
577
+ {
578
+ "epoch": 0.91,
579
+ "eval_avg_sts": 0.5572693279756483,
580
+ "eval_sickr_spearman": 0.5816440542669675,
581
+ "eval_stsb_spearman": 0.5328946016843291,
582
+ "step": 7000
583
+ },
584
+ {
585
+ "epoch": 0.92,
586
+ "eval_avg_sts": 0.5564053626625001,
587
+ "eval_sickr_spearman": 0.5780550262742075,
588
+ "eval_stsb_spearman": 0.5347556990507928,
589
+ "step": 7100
590
+ },
591
+ {
592
+ "epoch": 0.93,
593
+ "eval_avg_sts": 0.5595700328398423,
594
+ "eval_sickr_spearman": 0.5775588169652502,
595
+ "eval_stsb_spearman": 0.5415812487144344,
596
+ "step": 7200
597
+ },
598
+ {
599
+ "epoch": 0.95,
600
+ "eval_avg_sts": 0.5507423483095826,
601
+ "eval_sickr_spearman": 0.5766743242322674,
602
+ "eval_stsb_spearman": 0.5248103723868978,
603
+ "step": 7300
604
+ },
605
+ {
606
+ "epoch": 0.96,
607
+ "eval_avg_sts": 0.5496315444386322,
608
+ "eval_sickr_spearman": 0.5782933085684419,
609
+ "eval_stsb_spearman": 0.5209697803088224,
610
+ "step": 7400
611
+ },
612
+ {
613
+ "epoch": 0.97,
614
+ "learning_rate": 6.7578783555959026e-06,
615
+ "loss": 0.0029,
616
+ "step": 7500
617
+ },
618
+ {
619
+ "epoch": 0.97,
620
+ "eval_avg_sts": 0.5654071055800436,
621
+ "eval_sickr_spearman": 0.5835302836515436,
622
+ "eval_stsb_spearman": 0.5472839275085436,
623
+ "step": 7500
624
+ },
625
+ {
626
+ "epoch": 0.99,
627
+ "eval_avg_sts": 0.5573712317131654,
628
+ "eval_sickr_spearman": 0.5770115986265659,
629
+ "eval_stsb_spearman": 0.5377308647997648,
630
+ "step": 7600
631
+ },
632
+ {
633
+ "epoch": 1.0,
634
+ "eval_avg_sts": 0.561124258264991,
635
+ "eval_sickr_spearman": 0.5816098080916421,
636
+ "eval_stsb_spearman": 0.5406387084383399,
637
+ "step": 7700
638
+ },
639
+ {
640
+ "epoch": 1.01,
641
+ "eval_avg_sts": 0.5704081137772914,
642
+ "eval_sickr_spearman": 0.592072951260022,
643
+ "eval_stsb_spearman": 0.5487432762945608,
644
+ "step": 7800
645
+ },
646
+ {
647
+ "epoch": 1.02,
648
+ "eval_avg_sts": 0.5742201136812924,
649
+ "eval_sickr_spearman": 0.5966417656910182,
650
+ "eval_stsb_spearman": 0.5517984616715667,
651
+ "step": 7900
652
+ },
653
+ {
654
+ "epoch": 1.04,
655
+ "learning_rate": 6.541736912635629e-06,
656
+ "loss": 0.0041,
657
+ "step": 8000
658
+ },
659
+ {
660
+ "epoch": 1.04,
661
+ "eval_avg_sts": 0.578527273351308,
662
+ "eval_sickr_spearman": 0.6026895057663952,
663
+ "eval_stsb_spearman": 0.5543650409362207,
664
+ "step": 8000
665
+ },
666
+ {
667
+ "epoch": 1.05,
668
+ "eval_avg_sts": 0.5874722846001152,
669
+ "eval_sickr_spearman": 0.6127884771236713,
670
+ "eval_stsb_spearman": 0.5621560920765593,
671
+ "step": 8100
672
+ },
673
+ {
674
+ "epoch": 1.06,
675
+ "eval_avg_sts": 0.5832344728360276,
676
+ "eval_sickr_spearman": 0.6076356044991877,
677
+ "eval_stsb_spearman": 0.5588333411728674,
678
+ "step": 8200
679
+ },
680
+ {
681
+ "epoch": 1.08,
682
+ "eval_avg_sts": 0.5855265282675808,
683
+ "eval_sickr_spearman": 0.6116178150883202,
684
+ "eval_stsb_spearman": 0.5594352414468413,
685
+ "step": 8300
686
+ },
687
+ {
688
+ "epoch": 1.09,
689
+ "eval_avg_sts": 0.5773281021616433,
690
+ "eval_sickr_spearman": 0.6021348906380893,
691
+ "eval_stsb_spearman": 0.5525213136851973,
692
+ "step": 8400
693
+ },
694
+ {
695
+ "epoch": 1.1,
696
+ "learning_rate": 6.325595469675356e-06,
697
+ "loss": 0.0034,
698
+ "step": 8500
699
+ },
700
+ {
701
+ "epoch": 1.1,
702
+ "eval_avg_sts": 0.5733092691598134,
703
+ "eval_sickr_spearman": 0.598557342078573,
704
+ "eval_stsb_spearman": 0.5480611962410536,
705
+ "step": 8500
706
+ },
707
+ {
708
+ "epoch": 1.12,
709
+ "eval_avg_sts": 0.5735029071550635,
710
+ "eval_sickr_spearman": 0.5968002683257639,
711
+ "eval_stsb_spearman": 0.5502055459843631,
712
+ "step": 8600
713
+ },
714
+ {
715
+ "epoch": 1.13,
716
+ "eval_avg_sts": 0.5851313052653937,
717
+ "eval_sickr_spearman": 0.6086310971075941,
718
+ "eval_stsb_spearman": 0.5616315134231933,
719
+ "step": 8700
720
+ },
721
+ {
722
+ "epoch": 1.14,
723
+ "eval_avg_sts": 0.5914101664574409,
724
+ "eval_sickr_spearman": 0.6088285529656062,
725
+ "eval_stsb_spearman": 0.5739917799492756,
726
+ "step": 8800
727
+ },
728
+ {
729
+ "epoch": 1.15,
730
+ "eval_avg_sts": 0.5948432200811302,
731
+ "eval_sickr_spearman": 0.6124563567562383,
732
+ "eval_stsb_spearman": 0.5772300834060221,
733
+ "step": 8900
734
+ }
735
+ ],
736
+ "max_steps": 23133,
737
+ "num_train_epochs": 3,
738
+ "total_flos": 0,
739
+ "trial_name": null,
740
+ "trial_params": null
741
+ }
model/my-unsup-simcse-bert-base-uncased-0413/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd222bfb9c26b00bdd9e473783a9c2007e0baf100da40fb19c324977569aa06b
3
+ size 2171
model/my-unsup-simcse-bert-base-uncased-0413/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
model/my-unsup-simcse-bert-base-uncased_ori/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForCL"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "transformers_version": "4.2.1",
23
+ "type_vocab_size": 1,
24
+ "use_cache": true,
25
+ "vocab_size": 50265
26
+ }
model/my-unsup-simcse-bert-base-uncased_ori/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model/my-unsup-simcse-bert-base-uncased_ori/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d90ba75920b945193134035ade9fb71cf9af547549831752edbe17c72f0b4ff
3
+ size 997281797
model/my-unsup-simcse-bert-base-uncased_ori/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29f3ec6385de50439bcc848c54dbeffe904d2e8d9d503c1eb1ad40d49437b75b
3
+ size 498669037
model/my-unsup-simcse-bert-base-uncased_ori/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1522b9247a289f4d0d155e0bbe7375547292a9040940f809c7eb3c8cec5b56bf
3
+ size 627
model/my-unsup-simcse-bert-base-uncased_ori/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
model/my-unsup-simcse-bert-base-uncased_ori/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "name_or_path": "roberta-base"}
model/my-unsup-simcse-bert-base-uncased_ori/train_results.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epoch = 3.0
2
+ train_runtime = 763.7936
3
+ train_samples_per_second = 1.261
model/my-unsup-simcse-bert-base-uncased_ori/trainer_state.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7830865408035854,
3
+ "best_model_checkpoint": "result/my-unsup-simcse-bert-base-uncased",
4
+ "epoch": 0.62402496099844,
5
+ "global_step": 400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.16,
12
+ "eval_avg_sts": 0.7354961861232879,
13
+ "eval_sickr_spearman": 0.6918233183223756,
14
+ "eval_stsb_spearman": 0.7791690539242002,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.31,
19
+ "eval_avg_sts": 0.7392649185824722,
20
+ "eval_sickr_spearman": 0.7019525973046593,
21
+ "eval_stsb_spearman": 0.7765772398602853,
22
+ "step": 200
23
+ },
24
+ {
25
+ "epoch": 0.47,
26
+ "eval_avg_sts": 0.7348363506865718,
27
+ "eval_sickr_spearman": 0.6903618759989183,
28
+ "eval_stsb_spearman": 0.7793108253742252,
29
+ "step": 300
30
+ },
31
+ {
32
+ "epoch": 0.62,
33
+ "eval_avg_sts": 0.7360856589231828,
34
+ "eval_sickr_spearman": 0.6890847770427803,
35
+ "eval_stsb_spearman": 0.7830865408035854,
36
+ "step": 400
37
+ }
38
+ ],
39
+ "max_steps": 1923,
40
+ "num_train_epochs": 3,
41
+ "total_flos": 0,
42
+ "trial_name": null,
43
+ "trial_params": null
44
+ }
model/my-unsup-simcse-bert-base-uncased_ori/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa113111f5a68f9866ca380fbcb100457fb0a5f72e4c85cd5385220044e1e55
3
+ size 2107
model/my-unsup-simcse-bert-base-uncased_ori/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
model/my-unsup-simcse-bert-base-uncased_ori/vocab.txt ADDED
The diff for this file is too large to render. See raw diff