sayedshaungt commited on
Commit
e742c9e
·
verified ·
1 Parent(s): 038659e

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apply_position_embeddings": true,
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": 0.0,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6",
19
+ "7": "LABEL_7",
20
+ "8": "LABEL_8",
21
+ "9": "LABEL_9",
22
+ "10": "LABEL_10",
23
+ "11": "LABEL_11",
24
+ "12": "LABEL_12",
25
+ "13": "LABEL_13",
26
+ "14": "LABEL_14"
27
+ },
28
+ "initializer_range": 0.01,
29
+ "intermediate_size": 3072,
30
+ "label2id": {
31
+ "LABEL_0": 0,
32
+ "LABEL_1": 1,
33
+ "LABEL_10": 10,
34
+ "LABEL_11": 11,
35
+ "LABEL_12": 12,
36
+ "LABEL_13": 13,
37
+ "LABEL_14": 14,
38
+ "LABEL_2": 2,
39
+ "LABEL_3": 3,
40
+ "LABEL_4": 4,
41
+ "LABEL_5": 5,
42
+ "LABEL_6": 6,
43
+ "LABEL_7": 7,
44
+ "LABEL_8": 8,
45
+ "LABEL_9": 9
46
+ },
47
+ "layer_norm_eps": 1e-12,
48
+ "max_position_embeddings": 512,
49
+ "model_type": "bert",
50
+ "num_attention_heads": 12,
51
+ "num_hidden_layers": 12,
52
+ "pad_token_id": 4,
53
+ "position_embedding_type": "relative_key_query",
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.51.3",
56
+ "type_vocab_size": 2,
57
+ "use_cache": true,
58
+ "vocab_size": 50000
59
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a96f6341feda31f59181f8c8ab87447aa863cfff31dc2bb1030769ab72c6f7c9
3
+ size 498616780
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[SEP]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[UNK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[MASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[PAD]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_text": false,
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_basic_tokenize": true,
48
+ "do_lower_case": true,
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "never_split": null,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "strip_accents": false,
56
+ "tokenize_chinese_chars": false,
57
+ "tokenizer_class": "BertTokenizer",
58
+ "unk_token": "[UNK]"
59
+ }
trainer_state.json ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 17000,
3
+ "best_metric": 0.8946104799920928,
4
+ "best_model_checkpoint": "banBERT-Base-pos/checkpoint-17000",
5
+ "epoch": 0.9946755602363817,
6
+ "eval_steps": 1000,
7
+ "global_step": 17000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.05851032707272834,
14
+ "grad_norm": 4.870177745819092,
15
+ "learning_rate": 4.952324665090623e-05,
16
+ "loss": 0.5681,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 0.05851032707272834,
21
+ "eval_f1": 0.8762604390800369,
22
+ "eval_loss": 0.26431742310523987,
23
+ "eval_precision": 0.8742650838051628,
24
+ "eval_recall": 0.8796261556029517,
25
+ "eval_runtime": 28.9994,
26
+ "eval_samples_per_second": 261.178,
27
+ "eval_steps_per_second": 32.656,
28
+ "step": 1000
29
+ },
30
+ {
31
+ "epoch": 0.11702065414545668,
32
+ "grad_norm": 6.504812240600586,
33
+ "learning_rate": 4.853821907013397e-05,
34
+ "loss": 0.2821,
35
+ "step": 2000
36
+ },
37
+ {
38
+ "epoch": 0.11702065414545668,
39
+ "eval_f1": 0.8747857038906649,
40
+ "eval_loss": 0.2531687021255493,
41
+ "eval_precision": 0.8826666555035545,
42
+ "eval_recall": 0.8742214967913846,
43
+ "eval_runtime": 29.3663,
44
+ "eval_samples_per_second": 257.915,
45
+ "eval_steps_per_second": 32.248,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "epoch": 0.175530981218185,
50
+ "grad_norm": 2.781933307647705,
51
+ "learning_rate": 4.7553191489361704e-05,
52
+ "loss": 0.2697,
53
+ "step": 3000
54
+ },
55
+ {
56
+ "epoch": 0.175530981218185,
57
+ "eval_f1": 0.8666467093588278,
58
+ "eval_loss": 0.2597614824771881,
59
+ "eval_precision": 0.8736361925736554,
60
+ "eval_recall": 0.8661208831213065,
61
+ "eval_runtime": 29.0115,
62
+ "eval_samples_per_second": 261.068,
63
+ "eval_steps_per_second": 32.642,
64
+ "step": 3000
65
+ },
66
+ {
67
+ "epoch": 0.23404130829091335,
68
+ "grad_norm": 3.3094711303710938,
69
+ "learning_rate": 4.656816390858944e-05,
70
+ "loss": 0.2659,
71
+ "step": 4000
72
+ },
73
+ {
74
+ "epoch": 0.23404130829091335,
75
+ "eval_f1": 0.8798149861336466,
76
+ "eval_loss": 0.24752835929393768,
77
+ "eval_precision": 0.8764034593493383,
78
+ "eval_recall": 0.8851085241178042,
79
+ "eval_runtime": 29.6951,
80
+ "eval_samples_per_second": 255.059,
81
+ "eval_steps_per_second": 31.891,
82
+ "step": 4000
83
+ },
84
+ {
85
+ "epoch": 0.2925516353636417,
86
+ "grad_norm": 2.068115234375,
87
+ "learning_rate": 4.5583136327817186e-05,
88
+ "loss": 0.2608,
89
+ "step": 5000
90
+ },
91
+ {
92
+ "epoch": 0.2925516353636417,
93
+ "eval_f1": 0.8836360762583084,
94
+ "eval_loss": 0.23901157081127167,
95
+ "eval_precision": 0.8866831487439992,
96
+ "eval_recall": 0.8816467698684325,
97
+ "eval_runtime": 29.6967,
98
+ "eval_samples_per_second": 255.046,
99
+ "eval_steps_per_second": 31.889,
100
+ "step": 5000
101
+ },
102
+ {
103
+ "epoch": 0.35106196243637,
104
+ "grad_norm": 2.984527111053467,
105
+ "learning_rate": 4.459810874704492e-05,
106
+ "loss": 0.2548,
107
+ "step": 6000
108
+ },
109
+ {
110
+ "epoch": 0.35106196243637,
111
+ "eval_f1": 0.8786857503892376,
112
+ "eval_loss": 0.24374601244926453,
113
+ "eval_precision": 0.8808030269623512,
114
+ "eval_recall": 0.878183658913097,
115
+ "eval_runtime": 29.5386,
116
+ "eval_samples_per_second": 256.41,
117
+ "eval_steps_per_second": 32.06,
118
+ "step": 6000
119
+ },
120
+ {
121
+ "epoch": 0.40957228950909835,
122
+ "grad_norm": 3.5354621410369873,
123
+ "learning_rate": 4.3613081166272655e-05,
124
+ "loss": 0.2535,
125
+ "step": 7000
126
+ },
127
+ {
128
+ "epoch": 0.40957228950909835,
129
+ "eval_f1": 0.8885007809186033,
130
+ "eval_loss": 0.23431703448295593,
131
+ "eval_precision": 0.8823384881830838,
132
+ "eval_recall": 0.8961195227790774,
133
+ "eval_runtime": 28.9987,
134
+ "eval_samples_per_second": 261.184,
135
+ "eval_steps_per_second": 32.657,
136
+ "step": 7000
137
+ },
138
+ {
139
+ "epoch": 0.4680826165818267,
140
+ "grad_norm": 3.906360626220703,
141
+ "learning_rate": 4.2628053585500396e-05,
142
+ "loss": 0.255,
143
+ "step": 8000
144
+ },
145
+ {
146
+ "epoch": 0.4680826165818267,
147
+ "eval_f1": 0.8796937707178186,
148
+ "eval_loss": 0.23554810881614685,
149
+ "eval_precision": 0.8812208287855526,
150
+ "eval_recall": 0.880771461292737,
151
+ "eval_runtime": 30.6899,
152
+ "eval_samples_per_second": 246.791,
153
+ "eval_steps_per_second": 30.857,
154
+ "step": 8000
155
+ },
156
+ {
157
+ "epoch": 0.526592943654555,
158
+ "grad_norm": 3.616974353790283,
159
+ "learning_rate": 4.1644011032308906e-05,
160
+ "loss": 0.2457,
161
+ "step": 9000
162
+ },
163
+ {
164
+ "epoch": 0.526592943654555,
165
+ "eval_f1": 0.8853991026903275,
166
+ "eval_loss": 0.2313537746667862,
167
+ "eval_precision": 0.8879722004510241,
168
+ "eval_recall": 0.8847937369742619,
169
+ "eval_runtime": 29.3159,
170
+ "eval_samples_per_second": 258.358,
171
+ "eval_steps_per_second": 32.303,
172
+ "step": 9000
173
+ },
174
+ {
175
+ "epoch": 0.5851032707272834,
176
+ "grad_norm": 3.81512713432312,
177
+ "learning_rate": 4.065898345153665e-05,
178
+ "loss": 0.2527,
179
+ "step": 10000
180
+ },
181
+ {
182
+ "epoch": 0.5851032707272834,
183
+ "eval_f1": 0.8797925096491191,
184
+ "eval_loss": 0.2423781007528305,
185
+ "eval_precision": 0.8750872391747755,
186
+ "eval_recall": 0.8881566716696347,
187
+ "eval_runtime": 30.0187,
188
+ "eval_samples_per_second": 252.31,
189
+ "eval_steps_per_second": 31.547,
190
+ "step": 10000
191
+ },
192
+ {
193
+ "epoch": 0.6436135978000117,
194
+ "grad_norm": 2.189429521560669,
195
+ "learning_rate": 3.967494089834515e-05,
196
+ "loss": 0.2446,
197
+ "step": 11000
198
+ },
199
+ {
200
+ "epoch": 0.6436135978000117,
201
+ "eval_f1": 0.8923392397004614,
202
+ "eval_loss": 0.22613154351711273,
203
+ "eval_precision": 0.8840821992199377,
204
+ "eval_recall": 0.9019369581982146,
205
+ "eval_runtime": 30.4237,
206
+ "eval_samples_per_second": 248.951,
207
+ "eval_steps_per_second": 31.127,
208
+ "step": 11000
209
+ },
210
+ {
211
+ "epoch": 0.70212392487274,
212
+ "grad_norm": 3.8916683197021484,
213
+ "learning_rate": 3.86899133175729e-05,
214
+ "loss": 0.2496,
215
+ "step": 12000
216
+ },
217
+ {
218
+ "epoch": 0.70212392487274,
219
+ "eval_f1": 0.8838883711908031,
220
+ "eval_loss": 0.2282320261001587,
221
+ "eval_precision": 0.8870449139456895,
222
+ "eval_recall": 0.881842969912727,
223
+ "eval_runtime": 28.9189,
224
+ "eval_samples_per_second": 261.905,
225
+ "eval_steps_per_second": 32.747,
226
+ "step": 12000
227
+ },
228
+ {
229
+ "epoch": 0.7606342519454684,
230
+ "grad_norm": 2.145176887512207,
231
+ "learning_rate": 3.770587076438141e-05,
232
+ "loss": 0.2439,
233
+ "step": 13000
234
+ },
235
+ {
236
+ "epoch": 0.7606342519454684,
237
+ "eval_f1": 0.8769622250324842,
238
+ "eval_loss": 0.2252449244260788,
239
+ "eval_precision": 0.887928073773863,
240
+ "eval_recall": 0.8695732334120527,
241
+ "eval_runtime": 29.4593,
242
+ "eval_samples_per_second": 257.1,
243
+ "eval_steps_per_second": 32.146,
244
+ "step": 13000
245
+ },
246
+ {
247
+ "epoch": 0.8191445790181967,
248
+ "grad_norm": 1.7703274488449097,
249
+ "learning_rate": 3.672084318360914e-05,
250
+ "loss": 0.243,
251
+ "step": 14000
252
+ },
253
+ {
254
+ "epoch": 0.8191445790181967,
255
+ "eval_f1": 0.879454001823476,
256
+ "eval_loss": 0.22599145770072937,
257
+ "eval_precision": 0.8828080832134976,
258
+ "eval_recall": 0.8786664905760042,
259
+ "eval_runtime": 29.753,
260
+ "eval_samples_per_second": 254.562,
261
+ "eval_steps_per_second": 31.829,
262
+ "step": 14000
263
+ },
264
+ {
265
+ "epoch": 0.877654906090925,
266
+ "grad_norm": 4.403445243835449,
267
+ "learning_rate": 3.573581560283688e-05,
268
+ "loss": 0.2427,
269
+ "step": 15000
270
+ },
271
+ {
272
+ "epoch": 0.877654906090925,
273
+ "eval_f1": 0.8888557192580028,
274
+ "eval_loss": 0.22790007293224335,
275
+ "eval_precision": 0.8901524013282429,
276
+ "eval_recall": 0.8884489266872372,
277
+ "eval_runtime": 31.5334,
278
+ "eval_samples_per_second": 240.19,
279
+ "eval_steps_per_second": 30.032,
280
+ "step": 15000
281
+ },
282
+ {
283
+ "epoch": 0.9361652331636534,
284
+ "grad_norm": 6.978273868560791,
285
+ "learning_rate": 3.4751773049645395e-05,
286
+ "loss": 0.2371,
287
+ "step": 16000
288
+ },
289
+ {
290
+ "epoch": 0.9361652331636534,
291
+ "eval_f1": 0.8933984344595972,
292
+ "eval_loss": 0.22917409241199493,
293
+ "eval_precision": 0.8891663922870625,
294
+ "eval_recall": 0.8998411225843156,
295
+ "eval_runtime": 29.6746,
296
+ "eval_samples_per_second": 255.235,
297
+ "eval_steps_per_second": 31.913,
298
+ "step": 16000
299
+ },
300
+ {
301
+ "epoch": 0.9946755602363817,
302
+ "grad_norm": 4.8312859535217285,
303
+ "learning_rate": 3.376674546887313e-05,
304
+ "loss": 0.2405,
305
+ "step": 17000
306
+ },
307
+ {
308
+ "epoch": 0.9946755602363817,
309
+ "eval_f1": 0.8946104799920928,
310
+ "eval_loss": 0.2225894331932068,
311
+ "eval_precision": 0.8872567105766707,
312
+ "eval_recall": 0.903381031095196,
313
+ "eval_runtime": 29.592,
314
+ "eval_samples_per_second": 255.948,
315
+ "eval_steps_per_second": 32.002,
316
+ "step": 17000
317
+ }
318
+ ],
319
+ "logging_steps": 1000,
320
+ "max_steps": 51273,
321
+ "num_input_tokens_seen": 0,
322
+ "num_train_epochs": 3,
323
+ "save_steps": 1000,
324
+ "stateful_callbacks": {
325
+ "TrainerControl": {
326
+ "args": {
327
+ "should_epoch_stop": false,
328
+ "should_evaluate": false,
329
+ "should_log": false,
330
+ "should_save": true,
331
+ "should_training_stop": false
332
+ },
333
+ "attributes": {}
334
+ }
335
+ },
336
+ "total_flos": 4442566947840000.0,
337
+ "train_batch_size": 8,
338
+ "trial_name": null,
339
+ "trial_params": null
340
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff