leoner24 commited on
Commit
7b62a50
·
verified ·
1 Parent(s): 9bcbccf

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/lyhe/SentenceGen/RankCSE-master/checkpoints/multicse-roberta-base-uncased",
3
+ "architectures": [
4
+ "RobertaForCL"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "transformers_version": "4.2.1",
23
+ "type_vocab_size": 1,
24
+ "use_cache": true,
25
+ "vocab_size": 50265
26
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2eb5f5ddf6d661e96aefe7eb2f1c74b7d2364583a7e74f2b1eedd2126eeb85e
3
+ size 498669738
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/data/lyhe/SentenceGen/RankCSE-master/checkpoints/multicse-roberta-base-uncased"}
trainer_state.json ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8533187120426406,
3
+ "best_model_checkpoint": "checkpoints/longgen-soft_sort-multicse-roberta-base-uncased-lr:3e-6-es:25-dw:0.5",
4
+ "epoch": 0.08715682002116666,
5
+ "global_step": 1400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "eval_avg_sts": 0.7933545386236226,
13
+ "eval_sickr_spearman": 0.7452889947949065,
14
+ "eval_stsb_spearman": 0.8414200824523387,
15
+ "step": 25
16
+ },
17
+ {
18
+ "epoch": 0.11,
19
+ "eval_avg_sts": 0.8246845923550631,
20
+ "eval_sickr_spearman": 0.7947764472597243,
21
+ "eval_stsb_spearman": 0.8545927374504019,
22
+ "step": 50
23
+ },
24
+ {
25
+ "epoch": 0.17,
26
+ "eval_avg_sts": 0.831372297816624,
27
+ "eval_sickr_spearman": 0.7991409854163041,
28
+ "eval_stsb_spearman": 0.863603610216944,
29
+ "step": 75
30
+ },
31
+ {
32
+ "epoch": 0.22,
33
+ "eval_avg_sts": 0.8311033533020455,
34
+ "eval_sickr_spearman": 0.7968452428608664,
35
+ "eval_stsb_spearman": 0.8653614637432246,
36
+ "step": 100
37
+ },
38
+ {
39
+ "epoch": 0.28,
40
+ "eval_avg_sts": 0.8347302190626489,
41
+ "eval_sickr_spearman": 0.8026452865461272,
42
+ "eval_stsb_spearman": 0.8668151515791707,
43
+ "step": 125
44
+ },
45
+ {
46
+ "epoch": 0.33,
47
+ "eval_avg_sts": 0.8291379711550301,
48
+ "eval_sickr_spearman": 0.7935402227709101,
49
+ "eval_stsb_spearman": 0.8647357195391501,
50
+ "step": 150
51
+ },
52
+ {
53
+ "epoch": 0.39,
54
+ "eval_avg_sts": 0.8375071181348266,
55
+ "eval_sickr_spearman": 0.8121609682075389,
56
+ "eval_stsb_spearman": 0.8628532680621143,
57
+ "step": 175
58
+ },
59
+ {
60
+ "epoch": 0.44,
61
+ "eval_avg_sts": 0.8401116350055815,
62
+ "eval_sickr_spearman": 0.8101783404023766,
63
+ "eval_stsb_spearman": 0.8700449296087865,
64
+ "step": 200
65
+ },
66
+ {
67
+ "epoch": 0.5,
68
+ "eval_avg_sts": 0.8319177480289321,
69
+ "eval_sickr_spearman": 0.7987839222082131,
70
+ "eval_stsb_spearman": 0.8650515738496511,
71
+ "step": 225
72
+ },
73
+ {
74
+ "epoch": 0.55,
75
+ "eval_avg_sts": 0.8389465429116724,
76
+ "eval_sickr_spearman": 0.810887903863922,
77
+ "eval_stsb_spearman": 0.8670051819594228,
78
+ "step": 250
79
+ },
80
+ {
81
+ "epoch": 0.61,
82
+ "eval_avg_sts": 0.8427200181062976,
83
+ "eval_sickr_spearman": 0.8177831046930728,
84
+ "eval_stsb_spearman": 0.8676569315195224,
85
+ "step": 275
86
+ },
87
+ {
88
+ "epoch": 0.67,
89
+ "eval_avg_sts": 0.8463857988633654,
90
+ "eval_sickr_spearman": 0.8234845405270812,
91
+ "eval_stsb_spearman": 0.8692870571996496,
92
+ "step": 300
93
+ },
94
+ {
95
+ "epoch": 0.72,
96
+ "eval_avg_sts": 0.8427305224744921,
97
+ "eval_sickr_spearman": 0.8178719622307333,
98
+ "eval_stsb_spearman": 0.8675890827182509,
99
+ "step": 325
100
+ },
101
+ {
102
+ "epoch": 0.78,
103
+ "eval_avg_sts": 0.8421029268773901,
104
+ "eval_sickr_spearman": 0.8149265029661426,
105
+ "eval_stsb_spearman": 0.8692793507886375,
106
+ "step": 350
107
+ },
108
+ {
109
+ "epoch": 0.83,
110
+ "eval_avg_sts": 0.8477497237237577,
111
+ "eval_sickr_spearman": 0.8272267397023274,
112
+ "eval_stsb_spearman": 0.8682727077451879,
113
+ "step": 375
114
+ },
115
+ {
116
+ "epoch": 0.89,
117
+ "eval_avg_sts": 0.8442815051792503,
118
+ "eval_sickr_spearman": 0.821835440690305,
119
+ "eval_stsb_spearman": 0.8667275696681955,
120
+ "step": 400
121
+ },
122
+ {
123
+ "epoch": 0.94,
124
+ "eval_avg_sts": 0.8421091906691427,
125
+ "eval_sickr_spearman": 0.8156059028959847,
126
+ "eval_stsb_spearman": 0.8686124784423007,
127
+ "step": 425
128
+ },
129
+ {
130
+ "epoch": 1.0,
131
+ "eval_avg_sts": 0.8394955748362896,
132
+ "eval_sickr_spearman": 0.8169585547746848,
133
+ "eval_stsb_spearman": 0.8620325948978942,
134
+ "step": 450
135
+ },
136
+ {
137
+ "epoch": 1.05,
138
+ "eval_avg_sts": 0.8465826685429587,
139
+ "eval_sickr_spearman": 0.8285129165366373,
140
+ "eval_stsb_spearman": 0.86465242054928,
141
+ "step": 475
142
+ },
143
+ {
144
+ "epoch": 1.11,
145
+ "learning_rate": 3.152254249815226e-05,
146
+ "loss": 0.4875,
147
+ "step": 500
148
+ },
149
+ {
150
+ "epoch": 1.11,
151
+ "eval_avg_sts": 0.8509943622098309,
152
+ "eval_sickr_spearman": 0.8317867164106589,
153
+ "eval_stsb_spearman": 0.8702020080090029,
154
+ "step": 500
155
+ },
156
+ {
157
+ "epoch": 1.16,
158
+ "eval_avg_sts": 0.845221120650099,
159
+ "eval_sickr_spearman": 0.8256615501997637,
160
+ "eval_stsb_spearman": 0.8647806911004344,
161
+ "step": 525
162
+ },
163
+ {
164
+ "epoch": 1.22,
165
+ "eval_avg_sts": 0.84262665612863,
166
+ "eval_sickr_spearman": 0.8214538816204806,
167
+ "eval_stsb_spearman": 0.8637994306367793,
168
+ "step": 550
169
+ },
170
+ {
171
+ "epoch": 1.27,
172
+ "eval_avg_sts": 0.8424131248137818,
173
+ "eval_sickr_spearman": 0.8200806724303651,
174
+ "eval_stsb_spearman": 0.8647455771971985,
175
+ "step": 575
176
+ },
177
+ {
178
+ "epoch": 1.33,
179
+ "eval_avg_sts": 0.8456252822224128,
180
+ "eval_sickr_spearman": 0.8223322744035808,
181
+ "eval_stsb_spearman": 0.868918290041245,
182
+ "step": 600
183
+ },
184
+ {
185
+ "epoch": 1.39,
186
+ "eval_avg_sts": 0.8418597846725664,
187
+ "eval_sickr_spearman": 0.8187211040730578,
188
+ "eval_stsb_spearman": 0.8649984652720751,
189
+ "step": 625
190
+ },
191
+ {
192
+ "epoch": 1.44,
193
+ "eval_avg_sts": 0.8491621745286901,
194
+ "eval_sickr_spearman": 0.8251584244121993,
195
+ "eval_stsb_spearman": 0.8731659246451808,
196
+ "step": 650
197
+ },
198
+ {
199
+ "epoch": 1.5,
200
+ "eval_avg_sts": 0.8441242517008596,
201
+ "eval_sickr_spearman": 0.8207689100828717,
202
+ "eval_stsb_spearman": 0.8674795933188475,
203
+ "step": 675
204
+ },
205
+ {
206
+ "epoch": 1.55,
207
+ "eval_avg_sts": 0.8486831930599381,
208
+ "eval_sickr_spearman": 0.8272354813627892,
209
+ "eval_stsb_spearman": 0.8701309047570869,
210
+ "step": 700
211
+ },
212
+ {
213
+ "epoch": 1.61,
214
+ "eval_avg_sts": 0.8461637494650611,
215
+ "eval_sickr_spearman": 0.8234650879309988,
216
+ "eval_stsb_spearman": 0.8688624109991235,
217
+ "step": 725
218
+ },
219
+ {
220
+ "epoch": 1.66,
221
+ "eval_avg_sts": 0.8407491656841108,
222
+ "eval_sickr_spearman": 0.8176846409351248,
223
+ "eval_stsb_spearman": 0.863813690433097,
224
+ "step": 750
225
+ },
226
+ {
227
+ "epoch": 1.72,
228
+ "eval_avg_sts": 0.8437663221140155,
229
+ "eval_sickr_spearman": 0.8220878441283623,
230
+ "eval_stsb_spearman": 0.8654448000996685,
231
+ "step": 775
232
+ },
233
+ {
234
+ "epoch": 1.77,
235
+ "eval_avg_sts": 0.8461783409585929,
236
+ "eval_sickr_spearman": 0.8231578810062006,
237
+ "eval_stsb_spearman": 0.8691988009109852,
238
+ "step": 800
239
+ },
240
+ {
241
+ "epoch": 1.83,
242
+ "eval_avg_sts": 0.8405187952752955,
243
+ "eval_sickr_spearman": 0.8153428845845095,
244
+ "eval_stsb_spearman": 0.8656947059660813,
245
+ "step": 825
246
+ },
247
+ {
248
+ "epoch": 1.88,
249
+ "eval_avg_sts": 0.8427375515517908,
250
+ "eval_sickr_spearman": 0.8192698113758867,
251
+ "eval_stsb_spearman": 0.8662052917276947,
252
+ "step": 850
253
+ },
254
+ {
255
+ "epoch": 1.94,
256
+ "eval_avg_sts": 0.8433561035217969,
257
+ "eval_sickr_spearman": 0.8219217045484879,
258
+ "eval_stsb_spearman": 0.8647905024951058,
259
+ "step": 875
260
+ },
261
+ {
262
+ "epoch": 2.0,
263
+ "eval_avg_sts": 0.8422742315982205,
264
+ "eval_sickr_spearman": 0.8198740426319783,
265
+ "eval_stsb_spearman": 0.8646744205644626,
266
+ "step": 900
267
+ },
268
+ {
269
+ "epoch": 2.05,
270
+ "eval_avg_sts": 0.8450157787124789,
271
+ "eval_sickr_spearman": 0.8220855386354934,
272
+ "eval_stsb_spearman": 0.8679460187894644,
273
+ "step": 925
274
+ },
275
+ {
276
+ "epoch": 2.11,
277
+ "eval_avg_sts": 0.8433056158731305,
278
+ "eval_sickr_spearman": 0.8200347066662887,
279
+ "eval_stsb_spearman": 0.8665765250799722,
280
+ "step": 950
281
+ },
282
+ {
283
+ "epoch": 2.16,
284
+ "eval_avg_sts": 0.8406107657907074,
285
+ "eval_sickr_spearman": 0.8160642156659073,
286
+ "eval_stsb_spearman": 0.8651573159155074,
287
+ "step": 975
288
+ },
289
+ {
290
+ "epoch": 2.22,
291
+ "learning_rate": 1.3045084996304511e-05,
292
+ "loss": 0.2406,
293
+ "step": 1000
294
+ },
295
+ {
296
+ "epoch": 2.22,
297
+ "eval_avg_sts": 0.8441566677812831,
298
+ "eval_sickr_spearman": 0.819970921363579,
299
+ "eval_stsb_spearman": 0.8683424141989871,
300
+ "step": 1000
301
+ },
302
+ {
303
+ "epoch": 2.27,
304
+ "eval_avg_sts": 0.843133071876425,
305
+ "eval_sickr_spearman": 0.8194127519337666,
306
+ "eval_stsb_spearman": 0.8668533918190835,
307
+ "step": 1025
308
+ },
309
+ {
310
+ "epoch": 2.33,
311
+ "eval_avg_sts": 0.8427876550387465,
312
+ "eval_sickr_spearman": 0.818166056664839,
313
+ "eval_stsb_spearman": 0.8674092534126542,
314
+ "step": 1050
315
+ },
316
+ {
317
+ "epoch": 2.38,
318
+ "eval_avg_sts": 0.8430375455152233,
319
+ "eval_sickr_spearman": 0.8175366090804925,
320
+ "eval_stsb_spearman": 0.868538481949954,
321
+ "step": 1075
322
+ },
323
+ {
324
+ "epoch": 2.44,
325
+ "eval_avg_sts": 0.8419815162026707,
326
+ "eval_sickr_spearman": 0.8154516750292669,
327
+ "eval_stsb_spearman": 0.8685113573760747,
328
+ "step": 1100
329
+ },
330
+ {
331
+ "epoch": 2.49,
332
+ "eval_avg_sts": 0.8439982682941662,
333
+ "eval_sickr_spearman": 0.818244203266879,
334
+ "eval_stsb_spearman": 0.8697523333214534,
335
+ "step": 1125
336
+ },
337
+ {
338
+ "epoch": 2.55,
339
+ "eval_avg_sts": 0.8455002294336829,
340
+ "eval_sickr_spearman": 0.8215922111926224,
341
+ "eval_stsb_spearman": 0.8694082476747435,
342
+ "step": 1150
343
+ },
344
+ {
345
+ "epoch": 2.61,
346
+ "eval_avg_sts": 0.8415438728640847,
347
+ "eval_sickr_spearman": 0.8160743021972091,
348
+ "eval_stsb_spearman": 0.8670134435309604,
349
+ "step": 1175
350
+ },
351
+ {
352
+ "epoch": 2.66,
353
+ "eval_avg_sts": 0.8430556628773995,
354
+ "eval_sickr_spearman": 0.8172068275380184,
355
+ "eval_stsb_spearman": 0.8689044982167805,
356
+ "step": 1200
357
+ },
358
+ {
359
+ "epoch": 2.72,
360
+ "eval_avg_sts": 0.8426755323560923,
361
+ "eval_sickr_spearman": 0.8163431803030597,
362
+ "eval_stsb_spearman": 0.8690078844091248,
363
+ "step": 1225
364
+ },
365
+ {
366
+ "epoch": 2.77,
367
+ "eval_avg_sts": 0.8432975917391303,
368
+ "eval_sickr_spearman": 0.8183610149055763,
369
+ "eval_stsb_spearman": 0.8682341685726842,
370
+ "step": 1250
371
+ },
372
+ {
373
+ "epoch": 2.83,
374
+ "eval_avg_sts": 0.8435685574000784,
375
+ "eval_sickr_spearman": 0.8189177914334469,
376
+ "eval_stsb_spearman": 0.8682193233667098,
377
+ "step": 1275
378
+ },
379
+ {
380
+ "epoch": 2.88,
381
+ "eval_avg_sts": 0.8441294339312184,
382
+ "eval_sickr_spearman": 0.8195803804777856,
383
+ "eval_stsb_spearman": 0.8686784873846513,
384
+ "step": 1300
385
+ },
386
+ {
387
+ "epoch": 2.94,
388
+ "eval_avg_sts": 0.8438416918303956,
389
+ "eval_sickr_spearman": 0.8193592933178659,
390
+ "eval_stsb_spearman": 0.8683240903429252,
391
+ "step": 1325
392
+ },
393
+ {
394
+ "epoch": 2.99,
395
+ "eval_avg_sts": 0.8441351416286236,
396
+ "eval_sickr_spearman": 0.8197974330251845,
397
+ "eval_stsb_spearman": 0.8684728502320628,
398
+ "step": 1350
399
+ },
400
+ {
401
+ "epoch": 3.0,
402
+ "step": 1353,
403
+ "train_runtime": 2044.9965,
404
+ "train_samples_per_second": 0.662
405
+ },
406
+ {
407
+ "epoch": 0.09,
408
+ "eval_avg_sts": 0.8519068652662883,
409
+ "eval_sickr_spearman": 0.8324423889763906,
410
+ "eval_stsb_spearman": 0.8713713415561861,
411
+ "step": 1375
412
+ },
413
+ {
414
+ "epoch": 0.09,
415
+ "eval_avg_sts": 0.8533187120426406,
416
+ "eval_sickr_spearman": 0.8342147846505584,
417
+ "eval_stsb_spearman": 0.8724226394347229,
418
+ "step": 1400
419
+ }
420
+ ],
421
+ "max_steps": 8032,
422
+ "num_train_epochs": 1,
423
+ "total_flos": 130663234395930240,
424
+ "trial_name": null,
425
+ "trial_params": null
426
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852624934ea43863ac502b42d2f8edc7fc0ad7f1eab4a8a9103468c6222ec370
3
+ size 2744
vocab.json ADDED
The diff for this file is too large to render. See raw diff