Cathy commited on
Commit
29fc92c
·
1 Parent(s): b38ec0b

Add model weight and config

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.8623978495597839,
4
+ "eval_loss": 0.6752045750617981,
5
+ "eval_runtime": 8.1962,
6
+ "eval_samples": 734,
7
+ "eval_samples_per_second": 89.554,
8
+ "eval_steps_per_second": 44.777,
9
+ "train_loss": 0.18581116994222005,
10
+ "train_runtime": 2514.2374,
11
+ "train_samples": 2889,
12
+ "train_samples_per_second": 22.981,
13
+ "train_steps_per_second": 1.432
14
+ }
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-large-mnli",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "RobertaForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 1024,
15
+ "id2label": {
16
+ "0": 0,
17
+ "1": 1,
18
+ "2": 2
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 4096,
22
+ "label2id": {
23
+ "0": 0,
24
+ "1": 1,
25
+ "2": 2
26
+ },
27
+ "layer_norm_eps": 1e-05,
28
+ "max_position_embeddings": 514,
29
+ "model_type": "roberta",
30
+ "num_attention_heads": 16,
31
+ "num_hidden_layers": 24,
32
+ "pad_token_id": 1,
33
+ "position_embedding_type": "absolute",
34
+ "problem_type": "single_label_classification",
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.10.0.dev0",
37
+ "type_vocab_size": 1,
38
+ "use_cache": true,
39
+ "vocab_size": 50265
40
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.8623978495597839,
4
+ "eval_loss": 0.6752045750617981,
5
+ "eval_runtime": 8.1962,
6
+ "eval_samples": 734,
7
+ "eval_samples_per_second": 89.554,
8
+ "eval_steps_per_second": 44.777
9
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
predict_results_None.txt ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index prediction
2
+ 0 1
3
+ 1 1
4
+ 2 0
5
+ 3 1
6
+ 4 0
7
+ 5 1
8
+ 6 1
9
+ 7 0
10
+ 8 1
11
+ 9 1
12
+ 10 1
13
+ 11 1
14
+ 12 1
15
+ 13 1
16
+ 14 0
17
+ 15 1
18
+ 16 0
19
+ 17 1
20
+ 18 2
21
+ 19 1
22
+ 20 0
23
+ 21 1
24
+ 22 2
25
+ 23 1
26
+ 24 0
27
+ 25 1
28
+ 26 2
29
+ 27 0
30
+ 28 0
31
+ 29 0
32
+ 30 1
33
+ 31 0
34
+ 32 0
35
+ 33 0
36
+ 34 0
37
+ 35 2
38
+ 36 0
39
+ 37 2
40
+ 38 0
41
+ 39 1
42
+ 40 0
43
+ 41 0
44
+ 42 0
45
+ 43 0
46
+ 44 0
47
+ 45 1
48
+ 46 2
49
+ 47 2
50
+ 48 2
51
+ 49 1
52
+ 50 0
53
+ 51 0
54
+ 52 0
55
+ 53 0
56
+ 54 1
57
+ 55 0
58
+ 56 1
59
+ 57 0
60
+ 58 1
61
+ 59 0
62
+ 60 1
63
+ 61 1
64
+ 62 1
65
+ 63 0
66
+ 64 0
67
+ 65 0
68
+ 66 0
69
+ 67 1
70
+ 68 2
71
+ 69 0
72
+ 70 0
73
+ 71 1
74
+ 72 0
75
+ 73 1
76
+ 74 1
77
+ 75 1
78
+ 76 1
79
+ 77 1
80
+ 78 1
81
+ 79 1
82
+ 80 1
83
+ 81 1
84
+ 82 0
85
+ 83 1
86
+ 84 0
87
+ 85 1
88
+ 86 0
89
+ 87 1
90
+ 88 1
91
+ 89 1
92
+ 90 0
93
+ 91 1
94
+ 92 1
95
+ 93 1
96
+ 94 1
97
+ 95 1
98
+ 96 1
99
+ 97 1
100
+ 98 1
101
+ 99 1
102
+ 100 1
103
+ 101 1
104
+ 102 0
105
+ 103 1
106
+ 104 2
107
+ 105 0
108
+ 106 0
109
+ 107 0
110
+ 108 0
111
+ 109 0
112
+ 110 0
113
+ 111 1
114
+ 112 0
115
+ 113 1
116
+ 114 1
117
+ 115 1
118
+ 116 1
119
+ 117 1
120
+ 118 1
121
+ 119 1
122
+ 120 1
123
+ 121 1
124
+ 122 0
125
+ 123 0
126
+ 124 0
127
+ 125 2
128
+ 126 2
129
+ 127 0
130
+ 128 2
131
+ 129 0
132
+ 130 1
133
+ 131 1
134
+ 132 1
135
+ 133 0
136
+ 134 1
137
+ 135 1
138
+ 136 1
139
+ 137 2
140
+ 138 2
141
+ 139 2
142
+ 140 1
143
+ 141 1
144
+ 142 2
145
+ 143 2
146
+ 144 1
147
+ 145 1
148
+ 146 1
149
+ 147 1
150
+ 148 0
151
+ 149 1
152
+ 150 0
153
+ 151 1
154
+ 152 1
155
+ 153 0
156
+ 154 0
157
+ 155 1
158
+ 156 2
159
+ 157 2
160
+ 158 1
161
+ 159 2
162
+ 160 1
163
+ 161 2
164
+ 162 1
165
+ 163 2
166
+ 164 2
167
+ 165 0
168
+ 166 0
169
+ 167 1
170
+ 168 0
171
+ 169 1
172
+ 170 0
173
+ 171 1
174
+ 172 1
175
+ 173 1
176
+ 174 1
177
+ 175 0
178
+ 176 1
179
+ 177 2
180
+ 178 1
181
+ 179 0
182
+ 180 0
183
+ 181 2
184
+ 182 1
185
+ 183 1
186
+ 184 2
187
+ 185 1
188
+ 186 1
189
+ 187 1
190
+ 188 0
191
+ 189 1
192
+ 190 0
193
+ 191 1
194
+ 192 0
195
+ 193 1
196
+ 194 1
197
+ 195 1
198
+ 196 1
199
+ 197 1
200
+ 198 0
201
+ 199 0
202
+ 200 0
203
+ 201 1
204
+ 202 1
205
+ 203 0
206
+ 204 0
207
+ 205 1
208
+ 206 0
209
+ 207 0
210
+ 208 1
211
+ 209 0
212
+ 210 1
213
+ 211 2
214
+ 212 1
215
+ 213 1
216
+ 214 1
217
+ 215 1
218
+ 216 0
219
+ 217 1
220
+ 218 0
221
+ 219 0
222
+ 220 0
223
+ 221 0
224
+ 222 0
225
+ 223 1
226
+ 224 0
227
+ 225 0
228
+ 226 0
229
+ 227 1
230
+ 228 0
231
+ 229 0
232
+ 230 1
233
+ 231 0
234
+ 232 0
235
+ 233 2
236
+ 234 0
237
+ 235 0
238
+ 236 1
239
+ 237 0
240
+ 238 0
241
+ 239 1
242
+ 240 0
243
+ 241 1
244
+ 242 1
245
+ 243 1
246
+ 244 1
247
+ 245 1
248
+ 246 1
249
+ 247 1
250
+ 248 1
251
+ 249 1
252
+ 250 1
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ae5760c2cd564499419f91c868033eaf11ec16a7a712180f9325e880dcc83e1
3
+ size 1421624777
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "roberta-large-mnli", "tokenizer_class": "RobertaTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "train_loss": 0.18581116994222005,
4
+ "train_runtime": 2514.2374,
5
+ "train_samples": 2889,
6
+ "train_samples_per_second": 22.981,
7
+ "train_steps_per_second": 1.432
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 19.996539792387544,
5
+ "global_step": 3600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.8460490703582764,
13
+ "eval_loss": 0.5365713834762573,
14
+ "eval_runtime": 8.0333,
15
+ "eval_samples_per_second": 91.369,
16
+ "eval_steps_per_second": 45.685,
17
+ "step": 180
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_accuracy": 0.8092643022537231,
22
+ "eval_loss": 0.5189609527587891,
23
+ "eval_runtime": 8.0124,
24
+ "eval_samples_per_second": 91.608,
25
+ "eval_steps_per_second": 45.804,
26
+ "step": 360
27
+ },
28
+ {
29
+ "epoch": 2.78,
30
+ "learning_rate": 4.309722222222222e-05,
31
+ "loss": 0.4021,
32
+ "step": 500
33
+ },
34
+ {
35
+ "epoch": 3.0,
36
+ "eval_accuracy": 0.8283378481864929,
37
+ "eval_loss": 0.6708077788352966,
38
+ "eval_runtime": 8.0551,
39
+ "eval_samples_per_second": 91.122,
40
+ "eval_steps_per_second": 45.561,
41
+ "step": 540
42
+ },
43
+ {
44
+ "epoch": 4.0,
45
+ "eval_accuracy": 0.8542234301567078,
46
+ "eval_loss": 0.516476571559906,
47
+ "eval_runtime": 8.0611,
48
+ "eval_samples_per_second": 91.054,
49
+ "eval_steps_per_second": 45.527,
50
+ "step": 720
51
+ },
52
+ {
53
+ "epoch": 5.0,
54
+ "eval_accuracy": 0.8188011050224304,
55
+ "eval_loss": 0.6029361486434937,
56
+ "eval_runtime": 8.0681,
57
+ "eval_samples_per_second": 90.975,
58
+ "eval_steps_per_second": 45.488,
59
+ "step": 900
60
+ },
61
+ {
62
+ "epoch": 5.55,
63
+ "learning_rate": 3.6166666666666674e-05,
64
+ "loss": 0.2576,
65
+ "step": 1000
66
+ },
67
+ {
68
+ "epoch": 6.0,
69
+ "eval_accuracy": 0.8487738370895386,
70
+ "eval_loss": 0.6060934066772461,
71
+ "eval_runtime": 8.0522,
72
+ "eval_samples_per_second": 91.155,
73
+ "eval_steps_per_second": 45.578,
74
+ "step": 1080
75
+ },
76
+ {
77
+ "epoch": 7.0,
78
+ "eval_accuracy": 0.8514986634254456,
79
+ "eval_loss": 0.748849630355835,
80
+ "eval_runtime": 8.0692,
81
+ "eval_samples_per_second": 90.963,
82
+ "eval_steps_per_second": 45.481,
83
+ "step": 1260
84
+ },
85
+ {
86
+ "epoch": 8.0,
87
+ "eval_accuracy": 0.8651226162910461,
88
+ "eval_loss": 0.6119422912597656,
89
+ "eval_runtime": 8.0635,
90
+ "eval_samples_per_second": 91.028,
91
+ "eval_steps_per_second": 45.514,
92
+ "step": 1440
93
+ },
94
+ {
95
+ "epoch": 8.33,
96
+ "learning_rate": 2.9236111111111115e-05,
97
+ "loss": 0.1738,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 9.0,
102
+ "eval_accuracy": 0.8542234301567078,
103
+ "eval_loss": 0.6864181160926819,
104
+ "eval_runtime": 8.212,
105
+ "eval_samples_per_second": 89.382,
106
+ "eval_steps_per_second": 44.691,
107
+ "step": 1620
108
+ },
109
+ {
110
+ "epoch": 10.0,
111
+ "eval_accuracy": 0.8446866273880005,
112
+ "eval_loss": 0.7817405462265015,
113
+ "eval_runtime": 8.0215,
114
+ "eval_samples_per_second": 91.505,
115
+ "eval_steps_per_second": 45.752,
116
+ "step": 1800
117
+ },
118
+ {
119
+ "epoch": 11.0,
120
+ "eval_accuracy": 0.8514986634254456,
121
+ "eval_loss": 0.6188392043113708,
122
+ "eval_runtime": 8.0857,
123
+ "eval_samples_per_second": 90.777,
124
+ "eval_steps_per_second": 45.389,
125
+ "step": 1980
126
+ },
127
+ {
128
+ "epoch": 11.11,
129
+ "learning_rate": 2.2305555555555556e-05,
130
+ "loss": 0.1303,
131
+ "step": 2000
132
+ },
133
+ {
134
+ "epoch": 12.0,
135
+ "eval_accuracy": 0.8569482564926147,
136
+ "eval_loss": 0.5936163663864136,
137
+ "eval_runtime": 8.0618,
138
+ "eval_samples_per_second": 91.047,
139
+ "eval_steps_per_second": 45.523,
140
+ "step": 2160
141
+ },
142
+ {
143
+ "epoch": 13.0,
144
+ "eval_accuracy": 0.859673023223877,
145
+ "eval_loss": 0.6109394431114197,
146
+ "eval_runtime": 8.0512,
147
+ "eval_samples_per_second": 91.167,
148
+ "eval_steps_per_second": 45.583,
149
+ "step": 2340
150
+ },
151
+ {
152
+ "epoch": 13.89,
153
+ "learning_rate": 1.5375e-05,
154
+ "loss": 0.1226,
155
+ "step": 2500
156
+ },
157
+ {
158
+ "epoch": 14.0,
159
+ "eval_accuracy": 0.8501362204551697,
160
+ "eval_loss": 0.7600889205932617,
161
+ "eval_runtime": 8.0176,
162
+ "eval_samples_per_second": 91.549,
163
+ "eval_steps_per_second": 45.774,
164
+ "step": 2520
165
+ },
166
+ {
167
+ "epoch": 15.0,
168
+ "eval_accuracy": 0.8501362204551697,
169
+ "eval_loss": 0.6596993803977966,
170
+ "eval_runtime": 8.0565,
171
+ "eval_samples_per_second": 91.107,
172
+ "eval_steps_per_second": 45.553,
173
+ "step": 2700
174
+ },
175
+ {
176
+ "epoch": 16.0,
177
+ "eval_accuracy": 0.8460490703582764,
178
+ "eval_loss": 0.712175190448761,
179
+ "eval_runtime": 8.0456,
180
+ "eval_samples_per_second": 91.23,
181
+ "eval_steps_per_second": 45.615,
182
+ "step": 2880
183
+ },
184
+ {
185
+ "epoch": 16.66,
186
+ "learning_rate": 8.430555555555556e-06,
187
+ "loss": 0.1261,
188
+ "step": 3000
189
+ },
190
+ {
191
+ "epoch": 17.0,
192
+ "eval_accuracy": 0.8514986634254456,
193
+ "eval_loss": 0.7294248938560486,
194
+ "eval_runtime": 8.0369,
195
+ "eval_samples_per_second": 91.329,
196
+ "eval_steps_per_second": 45.664,
197
+ "step": 3060
198
+ },
199
+ {
200
+ "epoch": 18.0,
201
+ "eval_accuracy": 0.863760232925415,
202
+ "eval_loss": 0.6875426173210144,
203
+ "eval_runtime": 8.0764,
204
+ "eval_samples_per_second": 90.882,
205
+ "eval_steps_per_second": 45.441,
206
+ "step": 3240
207
+ },
208
+ {
209
+ "epoch": 19.0,
210
+ "eval_accuracy": 0.8664849996566772,
211
+ "eval_loss": 0.6823601126670837,
212
+ "eval_runtime": 8.0997,
213
+ "eval_samples_per_second": 90.62,
214
+ "eval_steps_per_second": 45.31,
215
+ "step": 3420
216
+ },
217
+ {
218
+ "epoch": 19.44,
219
+ "learning_rate": 1.4861111111111113e-06,
220
+ "loss": 0.1044,
221
+ "step": 3500
222
+ },
223
+ {
224
+ "epoch": 20.0,
225
+ "eval_accuracy": 0.8623978495597839,
226
+ "eval_loss": 0.6752045750617981,
227
+ "eval_runtime": 8.0488,
228
+ "eval_samples_per_second": 91.193,
229
+ "eval_steps_per_second": 45.597,
230
+ "step": 3600
231
+ },
232
+ {
233
+ "epoch": 20.0,
234
+ "step": 3600,
235
+ "total_flos": 1.3459697175036672e+16,
236
+ "train_loss": 0.18581116994222005,
237
+ "train_runtime": 2514.2374,
238
+ "train_samples_per_second": 22.981,
239
+ "train_steps_per_second": 1.432
240
+ }
241
+ ],
242
+ "max_steps": 3600,
243
+ "num_train_epochs": 20,
244
+ "total_flos": 1.3459697175036672e+16,
245
+ "trial_name": null,
246
+ "trial_params": null
247
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5863e266a34ca57f81dba3f859ac9e02d43fd6d5c3b46b3d8f7b926485dd16a8
3
+ size 2671
vocab.json ADDED
The diff for this file is too large to render. See raw diff