KarelDO commited on
Commit
059f553
·
1 Parent(s): 633d53e

commit files to HF hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[noise-Negative]": 50273, "[food-Positive]": 50266, "[food-unknown]": 50267, "[food-Negative]": 50265, "[ambiance-no majority]": 50272, "[food-no majority]": 50268, "[ambiance-unknown]": 50271, "[service-no majority]": 50280, "[ambiance-Positive]": 50270, "[noise-no majority]": 50276, "[noise-Positive]": 50274, "[ambiance-Negative]": 50269, "[service-unknown]": 50279, "[service-Positive]": 50278, "[service-Negative]": 50277, "[noise-unknown]": 50275}
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "CEBaB/roberta-base.CEBaB.sa.5-class.exclusive.seed_77",
3
+ "architectures": [
4
+ "RobertaForFactualCounterfactualSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "opentable",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": 0,
16
+ "1": 1,
17
+ "2": 2,
18
+ "3": 3,
19
+ "4": 4
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "0": 0,
25
+ "1": 1,
26
+ "2": 2,
27
+ "3": 3,
28
+ "4": 4
29
+ },
30
+ "layer_norm_eps": 1e-05,
31
+ "max_position_embeddings": 514,
32
+ "model_type": "roberta",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "pad_token_id": 1,
36
+ "position_embedding_type": "absolute",
37
+ "problem_type": "single_label_classification",
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.18.0",
40
+ "type_vocab_size": 1,
41
+ "use_cache": true,
42
+ "vocab_size": 50281
43
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21117e659614a2d214d65fc3d3b13d8e1bbae260fa63b41fcfbd2aac1405cdcd
3
+ size 997410589
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edcb5d74319d87f2c70b5bc0b4e888269af981b85b270dc57065810c4522393e
3
+ size 498718701
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0798213e7495550bec47fde7dbd22cf8a0b3dce6d71f10d1939fe385776d8557
3
+ size 15523
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed0cd99a63f5bdba91ff272dbf57c5399fbbc30c2343f83e10b20c4185b2a019
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[food-Negative]", "[food-Positive]", "[food-unknown]", "[food-no majority]", "[ambiance-Negative]", "[ambiance-Positive]", "[ambiance-unknown]", "[ambiance-no majority]", "[noise-Negative]", "[noise-Positive]", "[noise-unknown]", "[noise-no majority]", "[service-Negative]", "[service-Positive]", "[service-unknown]", "[service-no majority]"]}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "roberta-base", "tokenizer_class": "RobertaTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.45347079634666443,
3
+ "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-77__roberta-base/checkpoint-500",
4
+ "epoch": 0.8576329331046312,
5
+ "global_step": 500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.03,
12
+ "learning_rate": 4.9783362218370885e-05,
13
+ "loss": 16.4408,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.07,
18
+ "learning_rate": 4.956672443674177e-05,
19
+ "loss": 13.3709,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.09,
24
+ "eval_distillation_accuracy_counterfactual": 0.3304258594150847,
25
+ "eval_distillation_accuracy_factual": 0.85351462288353,
26
+ "eval_distillation_f1_counterfactual": 0.243675431005225,
27
+ "eval_distillation_f1_factual": 0.8405624277794843,
28
+ "eval_groundtruth_accuracy_counterfactual": 0.3258081067213956,
29
+ "eval_groundtruth_f1_counterfactual": 0.24573702955510618,
30
+ "eval_groundtruth_f1_factual": 0.7133962491941664,
31
+ "eval_icace_cosine": 0.5112652778625488,
32
+ "eval_icace_l2": 0.8116109371185303,
33
+ "eval_icace_normdiff": 0.5299729108810425,
34
+ "eval_loss": 10.612378120422363,
35
+ "eval_runtime": 12.8754,
36
+ "eval_samples_per_second": 302.748,
37
+ "eval_steps_per_second": 2.408,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.1,
42
+ "learning_rate": 4.935008665511265e-05,
43
+ "loss": 11.6048,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.14,
48
+ "learning_rate": 4.913344887348354e-05,
49
+ "loss": 11.1238,
50
+ "step": 80
51
+ },
52
+ {
53
+ "epoch": 0.17,
54
+ "learning_rate": 4.891681109185442e-05,
55
+ "loss": 10.5122,
56
+ "step": 100
57
+ },
58
+ {
59
+ "epoch": 0.17,
60
+ "eval_distillation_accuracy_counterfactual": 0.5010261672652643,
61
+ "eval_distillation_accuracy_factual": 0.8737814263724987,
62
+ "eval_distillation_f1_counterfactual": 0.3809463454212182,
63
+ "eval_distillation_f1_factual": 0.8520946306923461,
64
+ "eval_groundtruth_accuracy_counterfactual": 0.47998973832734737,
65
+ "eval_groundtruth_f1_counterfactual": 0.36220047443175507,
66
+ "eval_groundtruth_f1_factual": 0.7127756432715844,
67
+ "eval_icace_cosine": 0.46907880902290344,
68
+ "eval_icace_l2": 0.6739468574523926,
69
+ "eval_icace_normdiff": 0.44848179817199707,
70
+ "eval_loss": 8.415631294250488,
71
+ "eval_runtime": 12.9894,
72
+ "eval_samples_per_second": 300.091,
73
+ "eval_steps_per_second": 2.387,
74
+ "step": 100
75
+ },
76
+ {
77
+ "epoch": 0.21,
78
+ "learning_rate": 4.8700173310225307e-05,
79
+ "loss": 10.4539,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.24,
84
+ "learning_rate": 4.848353552859619e-05,
85
+ "loss": 10.4598,
86
+ "step": 140
87
+ },
88
+ {
89
+ "epoch": 0.26,
90
+ "eval_distillation_accuracy_counterfactual": 0.5100051308363264,
91
+ "eval_distillation_accuracy_factual": 0.8648024628014367,
92
+ "eval_distillation_f1_counterfactual": 0.4378381593277977,
93
+ "eval_distillation_f1_factual": 0.8505368829186162,
94
+ "eval_groundtruth_accuracy_counterfactual": 0.4979476654694715,
95
+ "eval_groundtruth_f1_counterfactual": 0.42720537829821525,
96
+ "eval_groundtruth_f1_factual": 0.6962013496902839,
97
+ "eval_icace_cosine": 0.4693450629711151,
98
+ "eval_icace_l2": 0.6681450009346008,
99
+ "eval_icace_normdiff": 0.41851383447647095,
100
+ "eval_loss": 8.433810234069824,
101
+ "eval_runtime": 12.8826,
102
+ "eval_samples_per_second": 302.578,
103
+ "eval_steps_per_second": 2.406,
104
+ "step": 150
105
+ },
106
+ {
107
+ "epoch": 0.27,
108
+ "learning_rate": 4.826689774696707e-05,
109
+ "loss": 10.219,
110
+ "step": 160
111
+ },
112
+ {
113
+ "epoch": 0.31,
114
+ "learning_rate": 4.8050259965337955e-05,
115
+ "loss": 10.1164,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 0.34,
120
+ "learning_rate": 4.7833622183708845e-05,
121
+ "loss": 10.1494,
122
+ "step": 200
123
+ },
124
+ {
125
+ "epoch": 0.34,
126
+ "eval_distillation_accuracy_counterfactual": 0.48691636736788096,
127
+ "eval_distillation_accuracy_factual": 0.8858388917393535,
128
+ "eval_distillation_f1_counterfactual": 0.3500647816598059,
129
+ "eval_distillation_f1_factual": 0.8801680477409537,
130
+ "eval_groundtruth_accuracy_counterfactual": 0.46793227296049256,
131
+ "eval_groundtruth_f1_counterfactual": 0.3413720392476469,
132
+ "eval_groundtruth_f1_factual": 0.696679629826118,
133
+ "eval_icace_cosine": 0.4742945730686188,
134
+ "eval_icace_l2": 0.6967979073524475,
135
+ "eval_icace_normdiff": 0.4476469159126282,
136
+ "eval_loss": 8.772797584533691,
137
+ "eval_runtime": 13.0849,
138
+ "eval_samples_per_second": 297.901,
139
+ "eval_steps_per_second": 2.369,
140
+ "step": 200
141
+ },
142
+ {
143
+ "epoch": 0.38,
144
+ "learning_rate": 4.761698440207972e-05,
145
+ "loss": 9.9977,
146
+ "step": 220
147
+ },
148
+ {
149
+ "epoch": 0.41,
150
+ "learning_rate": 4.740034662045061e-05,
151
+ "loss": 10.0073,
152
+ "step": 240
153
+ },
154
+ {
155
+ "epoch": 0.43,
156
+ "eval_distillation_accuracy_counterfactual": 0.43560800410466904,
157
+ "eval_distillation_accuracy_factual": 0.8519753719856337,
158
+ "eval_distillation_f1_counterfactual": 0.35530824798064053,
159
+ "eval_distillation_f1_factual": 0.8365066372023451,
160
+ "eval_groundtruth_accuracy_counterfactual": 0.43766033863519754,
161
+ "eval_groundtruth_f1_counterfactual": 0.364318999243899,
162
+ "eval_groundtruth_f1_factual": 0.6915401170991118,
163
+ "eval_icace_cosine": 0.48830506205558777,
164
+ "eval_icace_l2": 0.7442983388900757,
165
+ "eval_icace_normdiff": 0.4745895266532898,
166
+ "eval_loss": 9.400979042053223,
167
+ "eval_runtime": 12.983,
168
+ "eval_samples_per_second": 300.24,
169
+ "eval_steps_per_second": 2.388,
170
+ "step": 250
171
+ },
172
+ {
173
+ "epoch": 0.45,
174
+ "learning_rate": 4.7183708838821494e-05,
175
+ "loss": 9.7738,
176
+ "step": 260
177
+ },
178
+ {
179
+ "epoch": 0.48,
180
+ "learning_rate": 4.6967071057192376e-05,
181
+ "loss": 9.9329,
182
+ "step": 280
183
+ },
184
+ {
185
+ "epoch": 0.51,
186
+ "learning_rate": 4.675043327556326e-05,
187
+ "loss": 9.7779,
188
+ "step": 300
189
+ },
190
+ {
191
+ "epoch": 0.51,
192
+ "eval_distillation_accuracy_counterfactual": 0.5359158542842484,
193
+ "eval_distillation_accuracy_factual": 0.8619805028219599,
194
+ "eval_distillation_f1_counterfactual": 0.47616723466717426,
195
+ "eval_distillation_f1_factual": 0.852650039087773,
196
+ "eval_groundtruth_accuracy_counterfactual": 0.5010261672652643,
197
+ "eval_groundtruth_f1_counterfactual": 0.44417532302390744,
198
+ "eval_groundtruth_f1_factual": 0.7073800556776944,
199
+ "eval_icace_cosine": 0.46865805983543396,
200
+ "eval_icace_l2": 0.6562190651893616,
201
+ "eval_icace_normdiff": 0.4228982925415039,
202
+ "eval_loss": 8.363639831542969,
203
+ "eval_runtime": 13.0389,
204
+ "eval_samples_per_second": 298.951,
205
+ "eval_steps_per_second": 2.377,
206
+ "step": 300
207
+ },
208
+ {
209
+ "epoch": 0.55,
210
+ "learning_rate": 4.653379549393415e-05,
211
+ "loss": 9.6834,
212
+ "step": 320
213
+ },
214
+ {
215
+ "epoch": 0.58,
216
+ "learning_rate": 4.6317157712305025e-05,
217
+ "loss": 9.547,
218
+ "step": 340
219
+ },
220
+ {
221
+ "epoch": 0.6,
222
+ "eval_distillation_accuracy_counterfactual": 0.5025654181631606,
223
+ "eval_distillation_accuracy_factual": 0.8855823499230374,
224
+ "eval_distillation_f1_counterfactual": 0.4398602753096072,
225
+ "eval_distillation_f1_factual": 0.8717503260210726,
226
+ "eval_groundtruth_accuracy_counterfactual": 0.4989738327347358,
227
+ "eval_groundtruth_f1_counterfactual": 0.44441757184034925,
228
+ "eval_groundtruth_f1_factual": 0.7229789758781793,
229
+ "eval_icace_cosine": 0.4650568664073944,
230
+ "eval_icace_l2": 0.6811135411262512,
231
+ "eval_icace_normdiff": 0.4423196017742157,
232
+ "eval_loss": 8.341341972351074,
233
+ "eval_runtime": 13.1584,
234
+ "eval_samples_per_second": 296.238,
235
+ "eval_steps_per_second": 2.356,
236
+ "step": 350
237
+ },
238
+ {
239
+ "epoch": 0.62,
240
+ "learning_rate": 4.6100519930675915e-05,
241
+ "loss": 9.4159,
242
+ "step": 360
243
+ },
244
+ {
245
+ "epoch": 0.65,
246
+ "learning_rate": 4.58838821490468e-05,
247
+ "loss": 9.6264,
248
+ "step": 380
249
+ },
250
+ {
251
+ "epoch": 0.69,
252
+ "learning_rate": 4.566724436741768e-05,
253
+ "loss": 9.2833,
254
+ "step": 400
255
+ },
256
+ {
257
+ "epoch": 0.69,
258
+ "eval_distillation_accuracy_counterfactual": 0.5495125705489995,
259
+ "eval_distillation_accuracy_factual": 0.8494099538224731,
260
+ "eval_distillation_f1_counterfactual": 0.48234074968553386,
261
+ "eval_distillation_f1_factual": 0.834773556914336,
262
+ "eval_groundtruth_accuracy_counterfactual": 0.5266803488968702,
263
+ "eval_groundtruth_f1_counterfactual": 0.4613217521128874,
264
+ "eval_groundtruth_f1_factual": 0.7080154702931097,
265
+ "eval_icace_cosine": 0.4618784785270691,
266
+ "eval_icace_l2": 0.6433539986610413,
267
+ "eval_icace_normdiff": 0.40616923570632935,
268
+ "eval_loss": 8.343696594238281,
269
+ "eval_runtime": 15.6191,
270
+ "eval_samples_per_second": 249.565,
271
+ "eval_steps_per_second": 1.985,
272
+ "step": 400
273
+ },
274
+ {
275
+ "epoch": 0.72,
276
+ "learning_rate": 4.5450606585788563e-05,
277
+ "loss": 9.3389,
278
+ "step": 420
279
+ },
280
+ {
281
+ "epoch": 0.75,
282
+ "learning_rate": 4.5233968804159446e-05,
283
+ "loss": 9.2901,
284
+ "step": 440
285
+ },
286
+ {
287
+ "epoch": 0.77,
288
+ "eval_distillation_accuracy_counterfactual": 0.4989738327347358,
289
+ "eval_distillation_accuracy_factual": 0.8601847101077476,
290
+ "eval_distillation_f1_counterfactual": 0.33860612154049907,
291
+ "eval_distillation_f1_factual": 0.8490636185423319,
292
+ "eval_groundtruth_accuracy_counterfactual": 0.4892252437147255,
293
+ "eval_groundtruth_f1_counterfactual": 0.33971729579938115,
294
+ "eval_groundtruth_f1_factual": 0.7307035270822468,
295
+ "eval_icace_cosine": 0.475065141916275,
296
+ "eval_icace_l2": 0.6995478272438049,
297
+ "eval_icace_normdiff": 0.440874308347702,
298
+ "eval_loss": 8.762495994567871,
299
+ "eval_runtime": 13.0872,
300
+ "eval_samples_per_second": 297.848,
301
+ "eval_steps_per_second": 2.369,
302
+ "step": 450
303
+ },
304
+ {
305
+ "epoch": 0.79,
306
+ "learning_rate": 4.501733102253033e-05,
307
+ "loss": 9.3355,
308
+ "step": 460
309
+ },
310
+ {
311
+ "epoch": 0.82,
312
+ "learning_rate": 4.480069324090121e-05,
313
+ "loss": 9.1845,
314
+ "step": 480
315
+ },
316
+ {
317
+ "epoch": 0.86,
318
+ "learning_rate": 4.45840554592721e-05,
319
+ "loss": 9.2082,
320
+ "step": 500
321
+ },
322
+ {
323
+ "epoch": 0.86,
324
+ "eval_distillation_accuracy_counterfactual": 0.5823499230374551,
325
+ "eval_distillation_accuracy_factual": 0.8514622883530015,
326
+ "eval_distillation_f1_counterfactual": 0.49132364435744985,
327
+ "eval_distillation_f1_factual": 0.8334607656933123,
328
+ "eval_groundtruth_accuracy_counterfactual": 0.5482298614674191,
329
+ "eval_groundtruth_f1_counterfactual": 0.46237812507137493,
330
+ "eval_groundtruth_f1_factual": 0.7129235668323691,
331
+ "eval_icace_cosine": 0.45347079634666443,
332
+ "eval_icace_l2": 0.6185017824172974,
333
+ "eval_icace_normdiff": 0.3950257897377014,
334
+ "eval_loss": 8.00942611694336,
335
+ "eval_runtime": 13.0722,
336
+ "eval_samples_per_second": 298.191,
337
+ "eval_steps_per_second": 2.371,
338
+ "step": 500
339
+ }
340
+ ],
341
+ "max_steps": 4616,
342
+ "num_train_epochs": 8,
343
+ "total_flos": 4209890279424000.0,
344
+ "trial_name": null,
345
+ "trial_params": null
346
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1673c25df66ab430cf5640069391befa925c7fce30623a5c929351ed4e140928
3
+ size 3183
vocab.json ADDED
The diff for this file is too large to render. See raw diff