garrettmeek123 commited on
Commit
6971664
·
verified ·
1 Parent(s): a735905

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 1,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2048,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 8,
19
+ "num_hidden_layers": 6,
20
+ "pad_token_id": 0,
21
+ "position_embedding_type": "absolute",
22
+ "problem_type": "single_label_classification",
23
+ "transformers_version": "4.57.1",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 50000
27
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf6bac3ed8ab13a04a58e3c3771bf8618783ca89cd3fa3a0b334a2652e4525ec
3
+ size 180180928
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d52e77d86d8a3374a90a8639ba3e028c2aa0f5af893f663cdbea531b8a2eca5c
3
+ size 360427403
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d8f17dbce2c1ba7b05953933032e3f312f06107888bdafc970bda4de84ee35
3
+ size 14709
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5da3e3f1943486073868b2c53a3c36ae7ff342ba9c42bdf7baaa7e2bf91bba4
3
+ size 1383
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6577149420304ede87aa59bf995d3982b1e1dce8c799a97d829347f2ac691371
3
+ size 1465
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": true,
38
+ "eos_token": "</s>",
39
+ "extra_special_tokens": {},
40
+ "model_max_length": 512,
41
+ "pad_token": "<pad>",
42
+ "tokenizer_class": "PreTrainedTokenizerFast",
43
+ "unk_token": "<unk>"
44
+ }
trainer_state.json ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2049,
3
+ "best_metric": 0.6427525622254758,
4
+ "best_model_checkpoint": "./saved_models/cbert/checkpoint-2049",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 3415,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.14641288433382138,
14
+ "grad_norm": 3.022106647491455,
15
+ "learning_rate": 1.942020497803807e-05,
16
+ "loss": 0.6936,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.29282576866764276,
21
+ "grad_norm": 1.8588309288024902,
22
+ "learning_rate": 1.8834553440702785e-05,
23
+ "loss": 0.6835,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.43923865300146414,
28
+ "grad_norm": 3.637296438217163,
29
+ "learning_rate": 1.8248901903367496e-05,
30
+ "loss": 0.6754,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.5856515373352855,
35
+ "grad_norm": 1.5142912864685059,
36
+ "learning_rate": 1.766325036603221e-05,
37
+ "loss": 0.6739,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.7320644216691069,
42
+ "grad_norm": 4.135972023010254,
43
+ "learning_rate": 1.7077598828696925e-05,
44
+ "loss": 0.6699,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.8784773060029283,
49
+ "grad_norm": 2.758687734603882,
50
+ "learning_rate": 1.649194729136164e-05,
51
+ "loss": 0.6775,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 1.0,
56
+ "eval_accuracy": 0.5995607613469985,
57
+ "eval_loss": 0.6492753028869629,
58
+ "eval_runtime": 1.5709,
59
+ "eval_samples_per_second": 1739.163,
60
+ "eval_steps_per_second": 54.747,
61
+ "step": 683
62
+ },
63
+ {
64
+ "epoch": 1.0248901903367496,
65
+ "grad_norm": 4.188994407653809,
66
+ "learning_rate": 1.5906295754026355e-05,
67
+ "loss": 0.6622,
68
+ "step": 700
69
+ },
70
+ {
71
+ "epoch": 1.171303074670571,
72
+ "grad_norm": 2.755507230758667,
73
+ "learning_rate": 1.532064421669107e-05,
74
+ "loss": 0.6485,
75
+ "step": 800
76
+ },
77
+ {
78
+ "epoch": 1.3177159590043923,
79
+ "grad_norm": 4.434422016143799,
80
+ "learning_rate": 1.4734992679355784e-05,
81
+ "loss": 0.6422,
82
+ "step": 900
83
+ },
84
+ {
85
+ "epoch": 1.4641288433382138,
86
+ "grad_norm": 3.1107141971588135,
87
+ "learning_rate": 1.4149341142020499e-05,
88
+ "loss": 0.6436,
89
+ "step": 1000
90
+ },
91
+ {
92
+ "epoch": 1.610541727672035,
93
+ "grad_norm": 4.113313674926758,
94
+ "learning_rate": 1.3563689604685213e-05,
95
+ "loss": 0.6308,
96
+ "step": 1100
97
+ },
98
+ {
99
+ "epoch": 1.7569546120058566,
100
+ "grad_norm": 5.160773754119873,
101
+ "learning_rate": 1.2978038067349928e-05,
102
+ "loss": 0.6223,
103
+ "step": 1200
104
+ },
105
+ {
106
+ "epoch": 1.903367496339678,
107
+ "grad_norm": 2.9103806018829346,
108
+ "learning_rate": 1.2392386530014641e-05,
109
+ "loss": 0.6312,
110
+ "step": 1300
111
+ },
112
+ {
113
+ "epoch": 2.0,
114
+ "eval_accuracy": 0.6317715959004392,
115
+ "eval_loss": 0.6276857256889343,
116
+ "eval_runtime": 1.5723,
117
+ "eval_samples_per_second": 1737.632,
118
+ "eval_steps_per_second": 54.699,
119
+ "step": 1366
120
+ },
121
+ {
122
+ "epoch": 2.049780380673499,
123
+ "grad_norm": 4.584695339202881,
124
+ "learning_rate": 1.1806734992679356e-05,
125
+ "loss": 0.6105,
126
+ "step": 1400
127
+ },
128
+ {
129
+ "epoch": 2.1961932650073206,
130
+ "grad_norm": 11.84752368927002,
131
+ "learning_rate": 1.122108345534407e-05,
132
+ "loss": 0.5812,
133
+ "step": 1500
134
+ },
135
+ {
136
+ "epoch": 2.342606149341142,
137
+ "grad_norm": 4.095500946044922,
138
+ "learning_rate": 1.0635431918008785e-05,
139
+ "loss": 0.589,
140
+ "step": 1600
141
+ },
142
+ {
143
+ "epoch": 2.4890190336749636,
144
+ "grad_norm": 4.87593936920166,
145
+ "learning_rate": 1.00497803806735e-05,
146
+ "loss": 0.5835,
147
+ "step": 1700
148
+ },
149
+ {
150
+ "epoch": 2.6354319180087846,
151
+ "grad_norm": 4.864409446716309,
152
+ "learning_rate": 9.464128843338215e-06,
153
+ "loss": 0.5877,
154
+ "step": 1800
155
+ },
156
+ {
157
+ "epoch": 2.781844802342606,
158
+ "grad_norm": 5.729854583740234,
159
+ "learning_rate": 8.87847730600293e-06,
160
+ "loss": 0.5911,
161
+ "step": 1900
162
+ },
163
+ {
164
+ "epoch": 2.9282576866764276,
165
+ "grad_norm": 9.67723560333252,
166
+ "learning_rate": 8.292825768667644e-06,
167
+ "loss": 0.5689,
168
+ "step": 2000
169
+ },
170
+ {
171
+ "epoch": 3.0,
172
+ "eval_accuracy": 0.6427525622254758,
173
+ "eval_loss": 0.6189645528793335,
174
+ "eval_runtime": 1.5647,
175
+ "eval_samples_per_second": 1746.065,
176
+ "eval_steps_per_second": 54.964,
177
+ "step": 2049
178
+ },
179
+ {
180
+ "epoch": 3.074670571010249,
181
+ "grad_norm": 5.034663677215576,
182
+ "learning_rate": 7.707174231332359e-06,
183
+ "loss": 0.5712,
184
+ "step": 2100
185
+ },
186
+ {
187
+ "epoch": 3.22108345534407,
188
+ "grad_norm": 4.707498550415039,
189
+ "learning_rate": 7.1215226939970725e-06,
190
+ "loss": 0.5292,
191
+ "step": 2200
192
+ },
193
+ {
194
+ "epoch": 3.3674963396778916,
195
+ "grad_norm": 4.101770401000977,
196
+ "learning_rate": 6.535871156661787e-06,
197
+ "loss": 0.5367,
198
+ "step": 2300
199
+ },
200
+ {
201
+ "epoch": 3.513909224011713,
202
+ "grad_norm": 6.7061614990234375,
203
+ "learning_rate": 5.950219619326502e-06,
204
+ "loss": 0.542,
205
+ "step": 2400
206
+ },
207
+ {
208
+ "epoch": 3.660322108345534,
209
+ "grad_norm": 10.22519588470459,
210
+ "learning_rate": 5.364568081991216e-06,
211
+ "loss": 0.5486,
212
+ "step": 2500
213
+ },
214
+ {
215
+ "epoch": 3.8067349926793557,
216
+ "grad_norm": 4.292933940887451,
217
+ "learning_rate": 4.77891654465593e-06,
218
+ "loss": 0.5331,
219
+ "step": 2600
220
+ },
221
+ {
222
+ "epoch": 3.953147877013177,
223
+ "grad_norm": 4.438432216644287,
224
+ "learning_rate": 4.193265007320644e-06,
225
+ "loss": 0.5361,
226
+ "step": 2700
227
+ },
228
+ {
229
+ "epoch": 4.0,
230
+ "eval_accuracy": 0.6295754026354319,
231
+ "eval_loss": 0.633277177810669,
232
+ "eval_runtime": 1.5589,
233
+ "eval_samples_per_second": 1752.491,
234
+ "eval_steps_per_second": 55.166,
235
+ "step": 2732
236
+ },
237
+ {
238
+ "epoch": 4.099560761346998,
239
+ "grad_norm": 5.632227420806885,
240
+ "learning_rate": 3.607613469985359e-06,
241
+ "loss": 0.5327,
242
+ "step": 2800
243
+ },
244
+ {
245
+ "epoch": 4.24597364568082,
246
+ "grad_norm": 8.069804191589355,
247
+ "learning_rate": 3.0219619326500732e-06,
248
+ "loss": 0.5111,
249
+ "step": 2900
250
+ },
251
+ {
252
+ "epoch": 4.392386530014641,
253
+ "grad_norm": 7.2090840339660645,
254
+ "learning_rate": 2.436310395314788e-06,
255
+ "loss": 0.4965,
256
+ "step": 3000
257
+ },
258
+ {
259
+ "epoch": 4.538799414348462,
260
+ "grad_norm": 10.964486122131348,
261
+ "learning_rate": 1.8506588579795024e-06,
262
+ "loss": 0.5082,
263
+ "step": 3100
264
+ },
265
+ {
266
+ "epoch": 4.685212298682284,
267
+ "grad_norm": 4.838050365447998,
268
+ "learning_rate": 1.2650073206442169e-06,
269
+ "loss": 0.497,
270
+ "step": 3200
271
+ },
272
+ {
273
+ "epoch": 4.831625183016105,
274
+ "grad_norm": 6.126997470855713,
275
+ "learning_rate": 6.793557833089313e-07,
276
+ "loss": 0.5137,
277
+ "step": 3300
278
+ },
279
+ {
280
+ "epoch": 4.978038067349927,
281
+ "grad_norm": 13.103296279907227,
282
+ "learning_rate": 9.370424597364569e-08,
283
+ "loss": 0.5202,
284
+ "step": 3400
285
+ },
286
+ {
287
+ "epoch": 5.0,
288
+ "eval_accuracy": 0.6372620790629575,
289
+ "eval_loss": 0.6495257019996643,
290
+ "eval_runtime": 1.5569,
291
+ "eval_samples_per_second": 1754.803,
292
+ "eval_steps_per_second": 55.239,
293
+ "step": 3415
294
+ }
295
+ ],
296
+ "logging_steps": 100,
297
+ "max_steps": 3415,
298
+ "num_input_tokens_seen": 0,
299
+ "num_train_epochs": 5,
300
+ "save_steps": 500,
301
+ "stateful_callbacks": {
302
+ "TrainerControl": {
303
+ "args": {
304
+ "should_epoch_stop": false,
305
+ "should_evaluate": false,
306
+ "should_log": false,
307
+ "should_save": true,
308
+ "should_training_stop": true
309
+ },
310
+ "attributes": {}
311
+ }
312
+ },
313
+ "total_flos": 3218980489267200.0,
314
+ "train_batch_size": 16,
315
+ "trial_name": null,
316
+ "trial_params": null
317
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60296b0ac67aa42d8068a139559567ccac3ac42ca392159292ed93f2f8628b44
3
+ size 5777