Earlan Josh Sabillano commited on
Commit
cef6e4d
·
verified ·
1 Parent(s): b2652f8

Delete checkpoint-372

Browse files
checkpoint-372/config.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/bart-large-mnli",
3
- "_num_labels": 3,
4
- "activation_dropout": 0.0,
5
- "activation_function": "gelu",
6
- "add_final_layer_norm": false,
7
- "architectures": [
8
- "BartForConditionalGeneration"
9
- ],
10
- "attention_dropout": 0.0,
11
- "bos_token_id": 0,
12
- "classif_dropout": 0.0,
13
- "classifier_dropout": 0.0,
14
- "d_model": 1024,
15
- "decoder_attention_heads": 16,
16
- "decoder_ffn_dim": 4096,
17
- "decoder_layerdrop": 0.0,
18
- "decoder_layers": 12,
19
- "decoder_start_token_id": 2,
20
- "dropout": 0.1,
21
- "encoder_attention_heads": 16,
22
- "encoder_ffn_dim": 4096,
23
- "encoder_layerdrop": 0.0,
24
- "encoder_layers": 12,
25
- "eos_token_id": 2,
26
- "forced_eos_token_id": 2,
27
- "gradient_checkpointing": false,
28
- "id2label": {
29
- "0": "contradiction",
30
- "1": "neutral",
31
- "2": "entailment"
32
- },
33
- "init_std": 0.02,
34
- "is_encoder_decoder": true,
35
- "label2id": {
36
- "contradiction": 0,
37
- "entailment": 2,
38
- "neutral": 1
39
- },
40
- "max_position_embeddings": 1024,
41
- "model_type": "bart",
42
- "normalize_before": false,
43
- "num_hidden_layers": 12,
44
- "output_past": false,
45
- "pad_token_id": 1,
46
- "scale_embedding": false,
47
- "torch_dtype": "float32",
48
- "transformers_version": "4.48.3",
49
- "use_cache": true,
50
- "vocab_size": 50265
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-372/generation_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "decoder_start_token_id": 2,
5
- "eos_token_id": 2,
6
- "forced_eos_token_id": 2,
7
- "pad_token_id": 1,
8
- "transformers_version": "4.48.3"
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-372/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f70cacbe02f71d7fc7c7aeb2eca84d54f4ba2622d459d4c3f69c85e0ba572715
3
- size 1625426996
 
 
 
 
checkpoint-372/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bb90e921d85a51f310351d5f6f9c254c166631639742aa0f8e8c969ac96003a
3
- size 3250759951
 
 
 
 
checkpoint-372/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3570a2905f26560fcccb4d2a2943795e881f47503488f0a90de8a71f2b6818a6
3
- size 14244
 
 
 
 
checkpoint-372/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3b5e6fdd4ac8947f62724067566ca74b82cd474980a3e5c15c66de64bd39e98
3
- size 1064
 
 
 
 
checkpoint-372/trainer_state.json DELETED
@@ -1,308 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.9809045226130655,
5
- "eval_steps": 500,
6
- "global_step": 372,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.08040201005025126,
13
- "grad_norm": 29.01942253112793,
14
- "learning_rate": 4.865591397849463e-05,
15
- "loss": 2.798,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.16080402010050251,
20
- "grad_norm": 2.744927406311035,
21
- "learning_rate": 4.731182795698925e-05,
22
- "loss": 0.7463,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.24120603015075376,
27
- "grad_norm": 23.628087997436523,
28
- "learning_rate": 4.596774193548387e-05,
29
- "loss": 0.6532,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.32160804020100503,
34
- "grad_norm": 7.0060715675354,
35
- "learning_rate": 4.4623655913978496e-05,
36
- "loss": 0.6,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.4020100502512563,
41
- "grad_norm": 4.830014228820801,
42
- "learning_rate": 4.327956989247312e-05,
43
- "loss": 0.4285,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.4824120603015075,
48
- "grad_norm": 3.5819506645202637,
49
- "learning_rate": 4.1935483870967746e-05,
50
- "loss": 0.4012,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.5628140703517588,
55
- "grad_norm": 11.144820213317871,
56
- "learning_rate": 4.0591397849462364e-05,
57
- "loss": 0.319,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.6432160804020101,
62
- "grad_norm": 6.964443206787109,
63
- "learning_rate": 3.924731182795699e-05,
64
- "loss": 0.3855,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.7236180904522613,
69
- "grad_norm": 10.514158248901367,
70
- "learning_rate": 3.7903225806451614e-05,
71
- "loss": 0.3164,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.8040201005025126,
76
- "grad_norm": 2.0289254188537598,
77
- "learning_rate": 3.655913978494624e-05,
78
- "loss": 0.2692,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.8844221105527639,
83
- "grad_norm": 1.9192312955856323,
84
- "learning_rate": 3.5215053763440864e-05,
85
- "loss": 0.2635,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.964824120603015,
90
- "grad_norm": 6.680674076080322,
91
- "learning_rate": 3.387096774193548e-05,
92
- "loss": 0.3005,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 1.0,
97
- "eval_loss": 0.2973356246948242,
98
- "eval_runtime": 8.055,
99
- "eval_samples_per_second": 247.052,
100
- "eval_steps_per_second": 30.913,
101
- "step": 125
102
- },
103
- {
104
- "epoch": 1.0402010050251256,
105
- "grad_norm": 9.173807144165039,
106
- "learning_rate": 3.252688172043011e-05,
107
- "loss": 0.3001,
108
- "step": 130
109
- },
110
- {
111
- "epoch": 1.120603015075377,
112
- "grad_norm": 3.8994596004486084,
113
- "learning_rate": 3.118279569892473e-05,
114
- "loss": 0.2407,
115
- "step": 140
116
- },
117
- {
118
- "epoch": 1.2010050251256281,
119
- "grad_norm": 4.364039421081543,
120
- "learning_rate": 2.9838709677419357e-05,
121
- "loss": 0.2059,
122
- "step": 150
123
- },
124
- {
125
- "epoch": 1.2814070351758793,
126
- "grad_norm": 13.414017677307129,
127
- "learning_rate": 2.8494623655913982e-05,
128
- "loss": 0.2356,
129
- "step": 160
130
- },
131
- {
132
- "epoch": 1.3618090452261307,
133
- "grad_norm": 11.49068832397461,
134
- "learning_rate": 2.71505376344086e-05,
135
- "loss": 0.3452,
136
- "step": 170
137
- },
138
- {
139
- "epoch": 1.442211055276382,
140
- "grad_norm": 7.965292453765869,
141
- "learning_rate": 2.5806451612903226e-05,
142
- "loss": 0.2722,
143
- "step": 180
144
- },
145
- {
146
- "epoch": 1.5226130653266332,
147
- "grad_norm": 2.0057058334350586,
148
- "learning_rate": 2.446236559139785e-05,
149
- "loss": 0.1858,
150
- "step": 190
151
- },
152
- {
153
- "epoch": 1.6030150753768844,
154
- "grad_norm": 3.195129632949829,
155
- "learning_rate": 2.3118279569892472e-05,
156
- "loss": 0.2449,
157
- "step": 200
158
- },
159
- {
160
- "epoch": 1.6834170854271355,
161
- "grad_norm": 4.746509075164795,
162
- "learning_rate": 2.1774193548387097e-05,
163
- "loss": 0.2339,
164
- "step": 210
165
- },
166
- {
167
- "epoch": 1.763819095477387,
168
- "grad_norm": 2.469165086746216,
169
- "learning_rate": 2.0430107526881722e-05,
170
- "loss": 0.1992,
171
- "step": 220
172
- },
173
- {
174
- "epoch": 1.8442211055276383,
175
- "grad_norm": 3.318335771560669,
176
- "learning_rate": 1.9086021505376344e-05,
177
- "loss": 0.2243,
178
- "step": 230
179
- },
180
- {
181
- "epoch": 1.9246231155778895,
182
- "grad_norm": 2.4900591373443604,
183
- "learning_rate": 1.774193548387097e-05,
184
- "loss": 0.2452,
185
- "step": 240
186
- },
187
- {
188
- "epoch": 2.0,
189
- "grad_norm": 1.6156350374221802,
190
- "learning_rate": 1.639784946236559e-05,
191
- "loss": 0.1968,
192
- "step": 250
193
- },
194
- {
195
- "epoch": 2.0,
196
- "eval_loss": 0.22428156435489655,
197
- "eval_runtime": 7.7355,
198
- "eval_samples_per_second": 257.256,
199
- "eval_steps_per_second": 32.189,
200
- "step": 250
201
- },
202
- {
203
- "epoch": 2.080402010050251,
204
- "grad_norm": 5.255651950836182,
205
- "learning_rate": 1.5053763440860215e-05,
206
- "loss": 0.1432,
207
- "step": 260
208
- },
209
- {
210
- "epoch": 2.1608040201005023,
211
- "grad_norm": 4.3175578117370605,
212
- "learning_rate": 1.3709677419354839e-05,
213
- "loss": 0.1143,
214
- "step": 270
215
- },
216
- {
217
- "epoch": 2.241206030150754,
218
- "grad_norm": 2.5253961086273193,
219
- "learning_rate": 1.2365591397849464e-05,
220
- "loss": 0.1325,
221
- "step": 280
222
- },
223
- {
224
- "epoch": 2.321608040201005,
225
- "grad_norm": 5.174256801605225,
226
- "learning_rate": 1.1021505376344087e-05,
227
- "loss": 0.116,
228
- "step": 290
229
- },
230
- {
231
- "epoch": 2.4020100502512562,
232
- "grad_norm": 3.690969228744507,
233
- "learning_rate": 9.67741935483871e-06,
234
- "loss": 0.0768,
235
- "step": 300
236
- },
237
- {
238
- "epoch": 2.4824120603015074,
239
- "grad_norm": 4.675231456756592,
240
- "learning_rate": 8.333333333333334e-06,
241
- "loss": 0.1081,
242
- "step": 310
243
- },
244
- {
245
- "epoch": 2.5628140703517586,
246
- "grad_norm": 5.262183666229248,
247
- "learning_rate": 6.989247311827957e-06,
248
- "loss": 0.1212,
249
- "step": 320
250
- },
251
- {
252
- "epoch": 2.64321608040201,
253
- "grad_norm": 3.3453903198242188,
254
- "learning_rate": 5.64516129032258e-06,
255
- "loss": 0.1103,
256
- "step": 330
257
- },
258
- {
259
- "epoch": 2.7236180904522613,
260
- "grad_norm": 3.3446080684661865,
261
- "learning_rate": 4.3010752688172045e-06,
262
- "loss": 0.1406,
263
- "step": 340
264
- },
265
- {
266
- "epoch": 2.8040201005025125,
267
- "grad_norm": 3.0553841590881348,
268
- "learning_rate": 2.9569892473118283e-06,
269
- "loss": 0.1042,
270
- "step": 350
271
- },
272
- {
273
- "epoch": 2.884422110552764,
274
- "grad_norm": 1.7012218236923218,
275
- "learning_rate": 1.6129032258064516e-06,
276
- "loss": 0.1145,
277
- "step": 360
278
- },
279
- {
280
- "epoch": 2.964824120603015,
281
- "grad_norm": 3.853684425354004,
282
- "learning_rate": 2.688172043010753e-07,
283
- "loss": 0.0893,
284
- "step": 370
285
- }
286
- ],
287
- "logging_steps": 10,
288
- "max_steps": 372,
289
- "num_input_tokens_seen": 0,
290
- "num_train_epochs": 3,
291
- "save_steps": 100,
292
- "stateful_callbacks": {
293
- "TrainerControl": {
294
- "args": {
295
- "should_epoch_stop": false,
296
- "should_evaluate": false,
297
- "should_log": false,
298
- "should_save": true,
299
- "should_training_stop": true
300
- },
301
- "attributes": {}
302
- }
303
- },
304
- "total_flos": 2539747126960128.0,
305
- "train_batch_size": 8,
306
- "trial_name": null,
307
- "trial_params": null
308
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-372/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed8f0e6064381a1811a4f562171b36052a99ad4099d891eec9fcb57aec61a8f2
3
- size 5368