rabie-karouia commited on
Commit
8c08997
·
verified ·
1 Parent(s): f38dcac

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.3,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.3,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "positive",
13
+ "1": "neutral",
14
+ "2": "negative"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "negative": 2,
20
+ "neutral": 1,
21
+ "positive": 0
22
+ },
23
+ "layer_norm_eps": 1e-12,
24
+ "max_position_embeddings": 512,
25
+ "model_type": "bert",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "position_embedding_type": "absolute",
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.51.3",
32
+ "type_vocab_size": 2,
33
+ "use_cache": true,
34
+ "vocab_size": 30522
35
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1deb9be85df5cf2caebaf89eb96edf2726065eaa82ed65a98977f951b9a359de
3
+ size 437961724
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39babeebdf045a0667104ca81277d52c20a3c02a286ae7cadc068f15e0b946a5
3
+ size 876039307
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44dcd01f34bcb1442f88d5e0041a9b57f83bc68bda81b16db67500d575523285
3
+ size 14455
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97c7b7b7b107f6d3c881356e647fadedace35248f4a6a75c5720b5b2fa06ee6e
3
+ size 1465
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
trainer_state.json ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 872,
3
+ "best_metric": 0.8792872839502528,
4
+ "best_model_checkpoint": "finbert-news\\checkpoint-872",
5
+ "epoch": 8.0,
6
+ "eval_steps": 500,
7
+ "global_step": 872,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.22935779816513763,
14
+ "grad_norm": 14.148714065551758,
15
+ "learning_rate": 5.4545454545454545e-06,
16
+ "loss": 1.4992,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.45871559633027525,
21
+ "grad_norm": 3.876201629638672,
22
+ "learning_rate": 1.1136363636363637e-05,
23
+ "loss": 1.1135,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.6880733944954128,
28
+ "grad_norm": 5.878354072570801,
29
+ "learning_rate": 1.681818181818182e-05,
30
+ "loss": 1.0131,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.9174311926605505,
35
+ "grad_norm": 8.439997673034668,
36
+ "learning_rate": 1.999028701591291e-05,
37
+ "loss": 0.9438,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 1.0,
42
+ "eval_accuracy": 0.6847545219638242,
43
+ "eval_best_tau": 0.3,
44
+ "eval_f1": 0.676566431827065,
45
+ "eval_loss": 0.7978772521018982,
46
+ "eval_runtime": 25.6096,
47
+ "eval_samples_per_second": 15.111,
48
+ "eval_steps_per_second": 0.508,
49
+ "step": 109
50
+ },
51
+ {
52
+ "epoch": 1.146788990825688,
53
+ "grad_norm": 11.146471977233887,
54
+ "learning_rate": 1.9896130185675263e-05,
55
+ "loss": 0.8281,
56
+ "step": 125
57
+ },
58
+ {
59
+ "epoch": 1.3761467889908257,
60
+ "grad_norm": 13.593254089355469,
61
+ "learning_rate": 1.9702741799106508e-05,
62
+ "loss": 0.7709,
63
+ "step": 150
64
+ },
65
+ {
66
+ "epoch": 1.6055045871559632,
67
+ "grad_norm": 18.543376922607422,
68
+ "learning_rate": 1.9412061021336404e-05,
69
+ "loss": 0.6926,
70
+ "step": 175
71
+ },
72
+ {
73
+ "epoch": 1.834862385321101,
74
+ "grad_norm": 8.12672233581543,
75
+ "learning_rate": 1.9027002598375012e-05,
76
+ "loss": 0.7276,
77
+ "step": 200
78
+ },
79
+ {
80
+ "epoch": 2.0,
81
+ "eval_accuracy": 0.7829457364341085,
82
+ "eval_best_tau": 0.3,
83
+ "eval_f1": 0.7652186280745745,
84
+ "eval_loss": 0.5902541875839233,
85
+ "eval_runtime": 26.2346,
86
+ "eval_samples_per_second": 14.752,
87
+ "eval_steps_per_second": 0.496,
88
+ "step": 218
89
+ },
90
+ {
91
+ "epoch": 2.0642201834862384,
92
+ "grad_norm": 19.604272842407227,
93
+ "learning_rate": 1.8551427630053464e-05,
94
+ "loss": 0.6274,
95
+ "step": 225
96
+ },
97
+ {
98
+ "epoch": 2.293577981651376,
99
+ "grad_norm": 14.275187492370605,
100
+ "learning_rate": 1.7990104853582494e-05,
101
+ "loss": 0.5545,
102
+ "step": 250
103
+ },
104
+ {
105
+ "epoch": 2.522935779816514,
106
+ "grad_norm": 5.522476673126221,
107
+ "learning_rate": 1.7348662825950356e-05,
108
+ "loss": 0.6335,
109
+ "step": 275
110
+ },
111
+ {
112
+ "epoch": 2.7522935779816513,
113
+ "grad_norm": 13.290278434753418,
114
+ "learning_rate": 1.6633533484642104e-05,
115
+ "loss": 0.5528,
116
+ "step": 300
117
+ },
118
+ {
119
+ "epoch": 2.981651376146789,
120
+ "grad_norm": 11.074273109436035,
121
+ "learning_rate": 1.5851887652614238e-05,
122
+ "loss": 0.5402,
123
+ "step": 325
124
+ },
125
+ {
126
+ "epoch": 3.0,
127
+ "eval_accuracy": 0.8268733850129198,
128
+ "eval_best_tau": 0.3,
129
+ "eval_f1": 0.815201593241898,
130
+ "eval_loss": 0.49993327260017395,
131
+ "eval_runtime": 26.1993,
132
+ "eval_samples_per_second": 14.771,
133
+ "eval_steps_per_second": 0.496,
134
+ "step": 327
135
+ },
136
+ {
137
+ "epoch": 3.2110091743119265,
138
+ "grad_norm": 11.70258617401123,
139
+ "learning_rate": 1.5011563134236408e-05,
140
+ "loss": 0.4388,
141
+ "step": 350
142
+ },
143
+ {
144
+ "epoch": 3.4403669724770642,
145
+ "grad_norm": 15.00348949432373,
146
+ "learning_rate": 1.4120986123204257e-05,
147
+ "loss": 0.3964,
148
+ "step": 375
149
+ },
150
+ {
151
+ "epoch": 3.669724770642202,
152
+ "grad_norm": 13.193717002868652,
153
+ "learning_rate": 1.3189086710490649e-05,
154
+ "loss": 0.424,
155
+ "step": 400
156
+ },
157
+ {
158
+ "epoch": 3.8990825688073394,
159
+ "grad_norm": 17.48609161376953,
160
+ "learning_rate": 1.2225209339563144e-05,
161
+ "loss": 0.4301,
162
+ "step": 425
163
+ },
164
+ {
165
+ "epoch": 4.0,
166
+ "eval_accuracy": 0.8604651162790697,
167
+ "eval_best_tau": 0.3,
168
+ "eval_f1": 0.8555597375519169,
169
+ "eval_loss": 0.4141731262207031,
170
+ "eval_runtime": 25.6623,
171
+ "eval_samples_per_second": 15.08,
172
+ "eval_steps_per_second": 0.507,
173
+ "step": 436
174
+ },
175
+ {
176
+ "epoch": 4.128440366972477,
177
+ "grad_norm": 29.30590057373047,
178
+ "learning_rate": 1.1239019106760909e-05,
179
+ "loss": 0.3741,
180
+ "step": 450
181
+ },
182
+ {
183
+ "epoch": 4.3577981651376145,
184
+ "grad_norm": 12.956811904907227,
185
+ "learning_rate": 1.024040484638617e-05,
186
+ "loss": 0.3516,
187
+ "step": 475
188
+ },
189
+ {
190
+ "epoch": 4.587155963302752,
191
+ "grad_norm": 13.52210807800293,
192
+ "learning_rate": 9.239379972305992e-06,
193
+ "loss": 0.3391,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 4.81651376146789,
198
+ "grad_norm": 15.72844123840332,
199
+ "learning_rate": 8.245982070356186e-06,
200
+ "loss": 0.312,
201
+ "step": 525
202
+ },
203
+ {
204
+ "epoch": 5.0,
205
+ "eval_accuracy": 0.8811369509043928,
206
+ "eval_best_tau": 0.3,
207
+ "eval_f1": 0.8760670434788734,
208
+ "eval_loss": 0.3646778464317322,
209
+ "eval_runtime": 25.7825,
210
+ "eval_samples_per_second": 15.01,
211
+ "eval_steps_per_second": 0.504,
212
+ "step": 545
213
+ },
214
+ {
215
+ "epoch": 5.045871559633028,
216
+ "grad_norm": 13.13558578491211,
217
+ "learning_rate": 7.270172248365468e-06,
218
+ "loss": 0.3204,
219
+ "step": 550
220
+ },
221
+ {
222
+ "epoch": 5.275229357798165,
223
+ "grad_norm": 14.52351188659668,
224
+ "learning_rate": 6.321735253048214e-06,
225
+ "loss": 0.2749,
226
+ "step": 575
227
+ },
228
+ {
229
+ "epoch": 5.504587155963303,
230
+ "grad_norm": 11.246268272399902,
231
+ "learning_rate": 5.410181355324622e-06,
232
+ "loss": 0.2962,
233
+ "step": 600
234
+ },
235
+ {
236
+ "epoch": 5.73394495412844,
237
+ "grad_norm": 21.78761100769043,
238
+ "learning_rate": 4.544650987894515e-06,
239
+ "loss": 0.2512,
240
+ "step": 625
241
+ },
242
+ {
243
+ "epoch": 5.963302752293578,
244
+ "grad_norm": 14.13925552368164,
245
+ "learning_rate": 3.733823091293274e-06,
246
+ "loss": 0.2983,
247
+ "step": 650
248
+ },
249
+ {
250
+ "epoch": 6.0,
251
+ "eval_accuracy": 0.8837209302325582,
252
+ "eval_best_tau": 0.3,
253
+ "eval_f1": 0.8773838929062463,
254
+ "eval_loss": 0.37674975395202637,
255
+ "eval_runtime": 26.3938,
256
+ "eval_samples_per_second": 14.663,
257
+ "eval_steps_per_second": 0.493,
258
+ "step": 654
259
+ },
260
+ {
261
+ "epoch": 6.192660550458716,
262
+ "grad_norm": 6.343225002288818,
263
+ "learning_rate": 2.9858280874723833e-06,
264
+ "loss": 0.266,
265
+ "step": 675
266
+ },
267
+ {
268
+ "epoch": 6.422018348623853,
269
+ "grad_norm": 16.15406036376953,
270
+ "learning_rate": 2.3081663535453736e-06,
271
+ "loss": 0.2446,
272
+ "step": 700
273
+ },
274
+ {
275
+ "epoch": 6.651376146788991,
276
+ "grad_norm": 12.924860000610352,
277
+ "learning_rate": 1.7076330131880525e-06,
278
+ "loss": 0.2401,
279
+ "step": 725
280
+ },
281
+ {
282
+ "epoch": 6.8807339449541285,
283
+ "grad_norm": 13.268035888671875,
284
+ "learning_rate": 1.1902497998330065e-06,
285
+ "loss": 0.2426,
286
+ "step": 750
287
+ },
288
+ {
289
+ "epoch": 7.0,
290
+ "eval_accuracy": 0.8811369509043928,
291
+ "eval_best_tau": 0.3,
292
+ "eval_f1": 0.8763676520557254,
293
+ "eval_loss": 0.3656945824623108,
294
+ "eval_runtime": 25.7215,
295
+ "eval_samples_per_second": 15.046,
296
+ "eval_steps_per_second": 0.505,
297
+ "step": 763
298
+ },
299
+ {
300
+ "epoch": 7.110091743119266,
301
+ "grad_norm": 4.727287769317627,
302
+ "learning_rate": 7.612046748871327e-07,
303
+ "loss": 0.2214,
304
+ "step": 775
305
+ },
306
+ {
307
+ "epoch": 7.339449541284404,
308
+ "grad_norm": 10.04178524017334,
309
+ "learning_rate": 4.247998064389458e-07,
310
+ "loss": 0.1936,
311
+ "step": 800
312
+ },
313
+ {
314
+ "epoch": 7.568807339449541,
315
+ "grad_norm": 24.02179527282715,
316
+ "learning_rate": 1.844084300893456e-07,
317
+ "loss": 0.2567,
318
+ "step": 825
319
+ },
320
+ {
321
+ "epoch": 7.798165137614679,
322
+ "grad_norm": 7.141385555267334,
323
+ "learning_rate": 4.244102447555909e-08,
324
+ "loss": 0.1959,
325
+ "step": 850
326
+ },
327
+ {
328
+ "epoch": 8.0,
329
+ "eval_accuracy": 0.8837209302325582,
330
+ "eval_best_tau": 0.3,
331
+ "eval_f1": 0.8792872839502528,
332
+ "eval_loss": 0.3580004572868347,
333
+ "eval_runtime": 26.2238,
334
+ "eval_samples_per_second": 14.758,
335
+ "eval_steps_per_second": 0.496,
336
+ "step": 872
337
+ }
338
+ ],
339
+ "logging_steps": 25,
340
+ "max_steps": 872,
341
+ "num_input_tokens_seen": 0,
342
+ "num_train_epochs": 8,
343
+ "save_steps": 500,
344
+ "stateful_callbacks": {
345
+ "EarlyStoppingCallback": {
346
+ "args": {
347
+ "early_stopping_patience": 2,
348
+ "early_stopping_threshold": 0.0
349
+ },
350
+ "attributes": {
351
+ "early_stopping_patience_counter": 0
352
+ }
353
+ },
354
+ "TrainerControl": {
355
+ "args": {
356
+ "should_epoch_stop": false,
357
+ "should_evaluate": false,
358
+ "should_log": false,
359
+ "should_save": true,
360
+ "should_training_stop": true
361
+ },
362
+ "attributes": {}
363
+ }
364
+ },
365
+ "total_flos": 1832848067893248.0,
366
+ "train_batch_size": 16,
367
+ "trial_name": null,
368
+ "trial_params": null
369
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d6ec71644c6b78e2400d55adb6f12088c3cc76e753a8f44c30ed0fe2d93fcdf
3
+ size 5649
vocab.txt ADDED
The diff for this file is too large to render. See raw diff