euler03 commited on
Commit
7f9faec
·
verified ·
1 Parent(s): 88eccd7

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.49.0",
24
+ "vocab_size": 30522
25
+ }
last-checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a10a2ecaff875b9c46ad2bbd2fed17c2a0a46c72399b0499d9bca795a82b01a
3
+ size 267832560
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffdf6481862c29fc9f519ed97553ae9c619649345ac1473ff2b63f00a952157
3
+ size 535727290
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e4a426a8b73c74a38f2a3b1243f7c773cf4681b425c8731e354a12e8672e330
3
+ size 14244
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cbe50d058b46466dc5c0d3a5f85c97b4ca24f57c286062ca922883cd2d25c9c
3
+ size 1064
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.0,
3
+ "best_model_checkpoint": "./results/checkpoint-500",
4
+ "epoch": 0.17094017094017094,
5
+ "eval_steps": 500,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.003418803418803419,
13
+ "grad_norm": 2.3258378505706787,
14
+ "learning_rate": 4.9943019943019945e-05,
15
+ "loss": 0.6681,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.006837606837606838,
20
+ "grad_norm": 0.7698261737823486,
21
+ "learning_rate": 4.988603988603989e-05,
22
+ "loss": 0.6451,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.010256410256410256,
27
+ "grad_norm": 1.6664257049560547,
28
+ "learning_rate": 4.982905982905983e-05,
29
+ "loss": 0.6485,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.013675213675213675,
34
+ "grad_norm": 0.6200563907623291,
35
+ "learning_rate": 4.9772079772079774e-05,
36
+ "loss": 0.6453,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.017094017094017096,
41
+ "grad_norm": 0.5258885622024536,
42
+ "learning_rate": 4.971509971509972e-05,
43
+ "loss": 0.6569,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.020512820512820513,
48
+ "grad_norm": 0.5715610384941101,
49
+ "learning_rate": 4.965811965811966e-05,
50
+ "loss": 0.6508,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.023931623931623933,
55
+ "grad_norm": 0.5744765400886536,
56
+ "learning_rate": 4.96011396011396e-05,
57
+ "loss": 0.6029,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.02735042735042735,
62
+ "grad_norm": 0.9320403337478638,
63
+ "learning_rate": 4.9544159544159546e-05,
64
+ "loss": 0.6644,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.03076923076923077,
69
+ "grad_norm": 0.5994309186935425,
70
+ "learning_rate": 4.948717948717949e-05,
71
+ "loss": 0.6757,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.03418803418803419,
76
+ "grad_norm": 0.4685361385345459,
77
+ "learning_rate": 4.943019943019943e-05,
78
+ "loss": 0.6372,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.037606837606837605,
83
+ "grad_norm": 0.6897755265235901,
84
+ "learning_rate": 4.9373219373219375e-05,
85
+ "loss": 0.6395,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.041025641025641026,
90
+ "grad_norm": 0.5714218616485596,
91
+ "learning_rate": 4.931623931623932e-05,
92
+ "loss": 0.6323,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.044444444444444446,
97
+ "grad_norm": 0.6862583160400391,
98
+ "learning_rate": 4.925925925925926e-05,
99
+ "loss": 0.6307,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.04786324786324787,
104
+ "grad_norm": 1.1985986232757568,
105
+ "learning_rate": 4.9202279202279204e-05,
106
+ "loss": 0.6353,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.05128205128205128,
111
+ "grad_norm": 0.4656996428966522,
112
+ "learning_rate": 4.9145299145299147e-05,
113
+ "loss": 0.6552,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.0547008547008547,
118
+ "grad_norm": 1.3551446199417114,
119
+ "learning_rate": 4.908831908831909e-05,
120
+ "loss": 0.6484,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.05811965811965812,
125
+ "grad_norm": 1.137487769126892,
126
+ "learning_rate": 4.903133903133903e-05,
127
+ "loss": 0.5905,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.06153846153846154,
132
+ "grad_norm": 0.6064645051956177,
133
+ "learning_rate": 4.8974358974358975e-05,
134
+ "loss": 0.6157,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.06495726495726496,
139
+ "grad_norm": 2.0975794792175293,
140
+ "learning_rate": 4.891737891737892e-05,
141
+ "loss": 0.6701,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.06837606837606838,
146
+ "grad_norm": 0.48940032720565796,
147
+ "learning_rate": 4.886039886039887e-05,
148
+ "loss": 0.6342,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.07179487179487179,
153
+ "grad_norm": 1.2511190176010132,
154
+ "learning_rate": 4.8803418803418804e-05,
155
+ "loss": 0.6521,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.07521367521367521,
160
+ "grad_norm": 0.7074885964393616,
161
+ "learning_rate": 4.874643874643875e-05,
162
+ "loss": 0.6548,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.07863247863247863,
167
+ "grad_norm": 1.152065396308899,
168
+ "learning_rate": 4.868945868945869e-05,
169
+ "loss": 0.6589,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.08205128205128205,
174
+ "grad_norm": 0.39897221326828003,
175
+ "learning_rate": 4.863247863247863e-05,
176
+ "loss": 0.6595,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.08547008547008547,
181
+ "grad_norm": 0.5259735584259033,
182
+ "learning_rate": 4.8575498575498576e-05,
183
+ "loss": 0.665,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.08888888888888889,
188
+ "grad_norm": 0.5097119808197021,
189
+ "learning_rate": 4.851851851851852e-05,
190
+ "loss": 0.6498,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.09230769230769231,
195
+ "grad_norm": 0.48037877678871155,
196
+ "learning_rate": 4.846153846153846e-05,
197
+ "loss": 0.5882,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.09572649572649573,
202
+ "grad_norm": 0.6850088834762573,
203
+ "learning_rate": 4.840455840455841e-05,
204
+ "loss": 0.6329,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.09914529914529914,
209
+ "grad_norm": 0.6092679500579834,
210
+ "learning_rate": 4.834757834757835e-05,
211
+ "loss": 0.6246,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.10256410256410256,
216
+ "grad_norm": 1.0922237634658813,
217
+ "learning_rate": 4.829059829059829e-05,
218
+ "loss": 0.6144,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.10598290598290598,
223
+ "grad_norm": 1.4150214195251465,
224
+ "learning_rate": 4.823361823361824e-05,
225
+ "loss": 0.643,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.1094017094017094,
230
+ "grad_norm": 1.516169548034668,
231
+ "learning_rate": 4.817663817663818e-05,
232
+ "loss": 0.6046,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.11282051282051282,
237
+ "grad_norm": 0.5234593749046326,
238
+ "learning_rate": 4.8119658119658126e-05,
239
+ "loss": 0.6193,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.11623931623931624,
244
+ "grad_norm": 0.6485182046890259,
245
+ "learning_rate": 4.806267806267806e-05,
246
+ "loss": 0.6314,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.11965811965811966,
251
+ "grad_norm": 0.9457536935806274,
252
+ "learning_rate": 4.8005698005698006e-05,
253
+ "loss": 0.5802,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.12307692307692308,
258
+ "grad_norm": 1.2444144487380981,
259
+ "learning_rate": 4.7948717948717955e-05,
260
+ "loss": 0.5927,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.1264957264957265,
265
+ "grad_norm": 0.499647855758667,
266
+ "learning_rate": 4.789173789173789e-05,
267
+ "loss": 0.6358,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.12991452991452992,
272
+ "grad_norm": 2.130183696746826,
273
+ "learning_rate": 4.7834757834757834e-05,
274
+ "loss": 0.6324,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.13333333333333333,
279
+ "grad_norm": 0.6378350257873535,
280
+ "learning_rate": 4.7777777777777784e-05,
281
+ "loss": 0.6061,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.13675213675213677,
286
+ "grad_norm": 0.39135029911994934,
287
+ "learning_rate": 4.772079772079772e-05,
288
+ "loss": 0.6329,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.14017094017094017,
293
+ "grad_norm": 0.5480381846427917,
294
+ "learning_rate": 4.766381766381767e-05,
295
+ "loss": 0.6607,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.14358974358974358,
300
+ "grad_norm": 0.4431852400302887,
301
+ "learning_rate": 4.7606837606837606e-05,
302
+ "loss": 0.6233,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.147008547008547,
307
+ "grad_norm": 0.4828330874443054,
308
+ "learning_rate": 4.754985754985755e-05,
309
+ "loss": 0.6437,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.15042735042735042,
314
+ "grad_norm": 0.5272857546806335,
315
+ "learning_rate": 4.74928774928775e-05,
316
+ "loss": 0.6671,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.15384615384615385,
321
+ "grad_norm": 1.4251387119293213,
322
+ "learning_rate": 4.7435897435897435e-05,
323
+ "loss": 0.658,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.15726495726495726,
328
+ "grad_norm": 0.8041712641716003,
329
+ "learning_rate": 4.737891737891738e-05,
330
+ "loss": 0.6487,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.1606837606837607,
335
+ "grad_norm": 0.7019796371459961,
336
+ "learning_rate": 4.732193732193733e-05,
337
+ "loss": 0.6019,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.1641025641025641,
342
+ "grad_norm": 0.8561422228813171,
343
+ "learning_rate": 4.7264957264957264e-05,
344
+ "loss": 0.6897,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 0.1675213675213675,
349
+ "grad_norm": 1.0677204132080078,
350
+ "learning_rate": 4.7207977207977214e-05,
351
+ "loss": 0.6848,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 0.17094017094017094,
356
+ "grad_norm": 0.4762294590473175,
357
+ "learning_rate": 4.7150997150997157e-05,
358
+ "loss": 0.6527,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 0.17094017094017094,
363
+ "eval_accuracy": 0.661082143772972,
364
+ "eval_f1": 0.0,
365
+ "eval_loss": 0.6433083415031433,
366
+ "eval_precision": 0.0,
367
+ "eval_recall": 0.0,
368
+ "eval_roc_auc": 0.4981741909669265,
369
+ "eval_runtime": 36.622,
370
+ "eval_samples_per_second": 319.453,
371
+ "eval_steps_per_second": 19.988,
372
+ "step": 500
373
+ }
374
+ ],
375
+ "logging_steps": 10,
376
+ "max_steps": 8775,
377
+ "num_input_tokens_seen": 0,
378
+ "num_train_epochs": 3,
379
+ "save_steps": 500,
380
+ "stateful_callbacks": {
381
+ "EarlyStoppingCallback": {
382
+ "args": {
383
+ "early_stopping_patience": 3,
384
+ "early_stopping_threshold": 0.001
385
+ },
386
+ "attributes": {
387
+ "early_stopping_patience_counter": 0
388
+ }
389
+ },
390
+ "TrainerControl": {
391
+ "args": {
392
+ "should_epoch_stop": false,
393
+ "should_evaluate": false,
394
+ "should_log": false,
395
+ "should_save": true,
396
+ "should_training_stop": false
397
+ },
398
+ "attributes": {}
399
+ }
400
+ },
401
+ "total_flos": 264934797312000.0,
402
+ "train_batch_size": 16,
403
+ "trial_name": null,
404
+ "trial_params": null
405
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2be5ea5a8355405fdb4a1fa2f56c3eec77a53269f31e4155f897571249d4091b
3
+ size 5368