ireema commited on
Commit
125b326
·
verified ·
1 Parent(s): 59e5797

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. optimizer.pt +3 -0
  2. rng_state.pth +3 -0
  3. scheduler.pt +3 -0
  4. trainer_state.json +449 -0
  5. training_args.bin +3 -0
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95e33471399ccfc7bed85d400c59f70fd5463e4db899c05e6657a938717603f
3
+ size 2492275004
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ded95083a69212f90b065f310cf63b3552b500af09f54416ddbc1aba99d20a0
3
+ size 14308
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69dae4013c514d4dd5af226e2985d14d77eacee0977117596e76139a814ff8d0
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.28394800424575806,
3
+ "best_model_checkpoint": "/kaggle/working/wav2vec2-speech-emotion/checkpoint-1100",
4
+ "epoch": 12.64367816091954,
5
+ "eval_steps": 100,
6
+ "global_step": 1100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.28735632183908044,
13
+ "grad_norm": 81616.25,
14
+ "learning_rate": 2.5e-06,
15
+ "loss": 1.9364,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.5747126436781609,
20
+ "grad_norm": 113624.453125,
21
+ "learning_rate": 5e-06,
22
+ "loss": 1.9286,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.8620689655172413,
27
+ "grad_norm": 74490.7890625,
28
+ "learning_rate": 7.500000000000001e-06,
29
+ "loss": 1.9194,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 1.1494252873563218,
34
+ "grad_norm": 94447.8125,
35
+ "learning_rate": 1e-05,
36
+ "loss": 1.9086,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 1.1494252873563218,
41
+ "eval_accuracy": 0.2261904761904762,
42
+ "eval_loss": 1.9561063051223755,
43
+ "eval_runtime": 10.2339,
44
+ "eval_samples_per_second": 16.416,
45
+ "eval_steps_per_second": 1.075,
46
+ "step": 100
47
+ },
48
+ {
49
+ "epoch": 1.4367816091954024,
50
+ "grad_norm": 60535.6171875,
51
+ "learning_rate": 1.25e-05,
52
+ "loss": 1.8885,
53
+ "step": 125
54
+ },
55
+ {
56
+ "epoch": 1.7241379310344827,
57
+ "grad_norm": 118006.9765625,
58
+ "learning_rate": 1.5000000000000002e-05,
59
+ "loss": 1.8847,
60
+ "step": 150
61
+ },
62
+ {
63
+ "epoch": 2.0114942528735633,
64
+ "grad_norm": 109447.6953125,
65
+ "learning_rate": 1.7500000000000002e-05,
66
+ "loss": 1.8595,
67
+ "step": 175
68
+ },
69
+ {
70
+ "epoch": 2.2988505747126435,
71
+ "grad_norm": 131271.265625,
72
+ "learning_rate": 2e-05,
73
+ "loss": 1.7796,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 2.2988505747126435,
78
+ "eval_accuracy": 0.42857142857142855,
79
+ "eval_loss": 1.737054705619812,
80
+ "eval_runtime": 10.3261,
81
+ "eval_samples_per_second": 16.269,
82
+ "eval_steps_per_second": 1.065,
83
+ "step": 200
84
+ },
85
+ {
86
+ "epoch": 2.586206896551724,
87
+ "grad_norm": 222429.421875,
88
+ "learning_rate": 1.9974751105436266e-05,
89
+ "loss": 1.6504,
90
+ "step": 225
91
+ },
92
+ {
93
+ "epoch": 2.873563218390805,
94
+ "grad_norm": 482113.40625,
95
+ "learning_rate": 1.98991319230804e-05,
96
+ "loss": 1.4559,
97
+ "step": 250
98
+ },
99
+ {
100
+ "epoch": 3.160919540229885,
101
+ "grad_norm": 375893.84375,
102
+ "learning_rate": 1.9773524313084857e-05,
103
+ "loss": 1.2633,
104
+ "step": 275
105
+ },
106
+ {
107
+ "epoch": 3.4482758620689653,
108
+ "grad_norm": 542423.5625,
109
+ "learning_rate": 1.959856256610988e-05,
110
+ "loss": 1.1442,
111
+ "step": 300
112
+ },
113
+ {
114
+ "epoch": 3.4482758620689653,
115
+ "eval_accuracy": 0.6726190476190477,
116
+ "eval_loss": 1.0434563159942627,
117
+ "eval_runtime": 10.2866,
118
+ "eval_samples_per_second": 16.332,
119
+ "eval_steps_per_second": 1.069,
120
+ "step": 300
121
+ },
122
+ {
123
+ "epoch": 3.735632183908046,
124
+ "grad_norm": 321237.03125,
125
+ "learning_rate": 1.937513020029588e-05,
126
+ "loss": 1.0287,
127
+ "step": 325
128
+ },
129
+ {
130
+ "epoch": 4.022988505747127,
131
+ "grad_norm": 494109.96875,
132
+ "learning_rate": 1.9104355499692166e-05,
133
+ "loss": 0.929,
134
+ "step": 350
135
+ },
136
+ {
137
+ "epoch": 4.310344827586207,
138
+ "grad_norm": 399620.6875,
139
+ "learning_rate": 1.8787605816671956e-05,
140
+ "loss": 0.809,
141
+ "step": 375
142
+ },
143
+ {
144
+ "epoch": 4.597701149425287,
145
+ "grad_norm": 320774.09375,
146
+ "learning_rate": 1.8426480667105178e-05,
147
+ "loss": 0.7284,
148
+ "step": 400
149
+ },
150
+ {
151
+ "epoch": 4.597701149425287,
152
+ "eval_accuracy": 0.7857142857142857,
153
+ "eval_loss": 0.8009253144264221,
154
+ "eval_runtime": 10.0989,
155
+ "eval_samples_per_second": 16.635,
156
+ "eval_steps_per_second": 1.089,
157
+ "step": 400
158
+ },
159
+ {
160
+ "epoch": 4.885057471264368,
161
+ "grad_norm": 316548.5625,
162
+ "learning_rate": 1.8022803653156983e-05,
163
+ "loss": 0.7361,
164
+ "step": 425
165
+ },
166
+ {
167
+ "epoch": 5.172413793103448,
168
+ "grad_norm": 592323.0,
169
+ "learning_rate": 1.757861325449997e-05,
170
+ "loss": 0.6226,
171
+ "step": 450
172
+ },
173
+ {
174
+ "epoch": 5.459770114942529,
175
+ "grad_norm": 686948.0625,
176
+ "learning_rate": 1.7096152534442515e-05,
177
+ "loss": 0.5987,
178
+ "step": 475
179
+ },
180
+ {
181
+ "epoch": 5.747126436781609,
182
+ "grad_norm": 314310.65625,
183
+ "learning_rate": 1.6577857812954994e-05,
184
+ "loss": 0.5259,
185
+ "step": 500
186
+ },
187
+ {
188
+ "epoch": 5.747126436781609,
189
+ "eval_accuracy": 0.8511904761904762,
190
+ "eval_loss": 0.5674353837966919,
191
+ "eval_runtime": 10.2098,
192
+ "eval_samples_per_second": 16.455,
193
+ "eval_steps_per_second": 1.077,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 6.0344827586206895,
198
+ "grad_norm": 747515.625,
199
+ "learning_rate": 1.6026346363792565e-05,
200
+ "loss": 0.534,
201
+ "step": 525
202
+ },
203
+ {
204
+ "epoch": 6.32183908045977,
205
+ "grad_norm": 308987.9375,
206
+ "learning_rate": 1.5444403197841345e-05,
207
+ "loss": 0.448,
208
+ "step": 550
209
+ },
210
+ {
211
+ "epoch": 6.609195402298851,
212
+ "grad_norm": 1300700.75,
213
+ "learning_rate": 1.4834966999429179e-05,
214
+ "loss": 0.439,
215
+ "step": 575
216
+ },
217
+ {
218
+ "epoch": 6.896551724137931,
219
+ "grad_norm": 359445.15625,
220
+ "learning_rate": 1.4201115286619464e-05,
221
+ "loss": 0.3734,
222
+ "step": 600
223
+ },
224
+ {
225
+ "epoch": 6.896551724137931,
226
+ "eval_accuracy": 0.8273809523809523,
227
+ "eval_loss": 0.6119872331619263,
228
+ "eval_runtime": 10.1904,
229
+ "eval_samples_per_second": 16.486,
230
+ "eval_steps_per_second": 1.079,
231
+ "step": 600
232
+ },
233
+ {
234
+ "epoch": 7.183908045977011,
235
+ "grad_norm": 156436.734375,
236
+ "learning_rate": 1.3546048870425356e-05,
237
+ "loss": 0.331,
238
+ "step": 625
239
+ },
240
+ {
241
+ "epoch": 7.471264367816092,
242
+ "grad_norm": 920202.625,
243
+ "learning_rate": 1.2873075691421808e-05,
244
+ "loss": 0.3074,
245
+ "step": 650
246
+ },
247
+ {
248
+ "epoch": 7.758620689655173,
249
+ "grad_norm": 501704.25,
250
+ "learning_rate": 1.2185594115376991e-05,
251
+ "loss": 0.2982,
252
+ "step": 675
253
+ },
254
+ {
255
+ "epoch": 8.045977011494253,
256
+ "grad_norm": 210118.0625,
257
+ "learning_rate": 1.1487075772256517e-05,
258
+ "loss": 0.2702,
259
+ "step": 700
260
+ },
261
+ {
262
+ "epoch": 8.045977011494253,
263
+ "eval_accuracy": 0.9464285714285714,
264
+ "eval_loss": 0.2987803518772125,
265
+ "eval_runtime": 10.1874,
266
+ "eval_samples_per_second": 16.491,
267
+ "eval_steps_per_second": 1.08,
268
+ "step": 700
269
+ },
270
+ {
271
+ "epoch": 8.333333333333334,
272
+ "grad_norm": 529805.875,
273
+ "learning_rate": 1.0781048025259648e-05,
274
+ "loss": 0.2609,
275
+ "step": 725
276
+ },
277
+ {
278
+ "epoch": 8.620689655172415,
279
+ "grad_norm": 280091.625,
280
+ "learning_rate": 1.0071076158414977e-05,
281
+ "loss": 0.2612,
282
+ "step": 750
283
+ },
284
+ {
285
+ "epoch": 8.908045977011493,
286
+ "grad_norm": 457920.8125,
287
+ "learning_rate": 9.360745372684346e-06,
288
+ "loss": 0.2353,
289
+ "step": 775
290
+ },
291
+ {
292
+ "epoch": 9.195402298850574,
293
+ "grad_norm": 791098.25,
294
+ "learning_rate": 8.653642681490608e-06,
295
+ "loss": 0.2171,
296
+ "step": 800
297
+ },
298
+ {
299
+ "epoch": 9.195402298850574,
300
+ "eval_accuracy": 0.8928571428571429,
301
+ "eval_loss": 0.34595435857772827,
302
+ "eval_runtime": 10.2959,
303
+ "eval_samples_per_second": 16.317,
304
+ "eval_steps_per_second": 1.068,
305
+ "step": 800
306
+ },
307
+ {
308
+ "epoch": 9.482758620689655,
309
+ "grad_norm": 490233.0,
310
+ "learning_rate": 7.953338797092902e-06,
311
+ "loss": 0.1884,
312
+ "step": 825
313
+ },
314
+ {
315
+ "epoch": 9.770114942528735,
316
+ "grad_norm": 605037.8125,
317
+ "learning_rate": 7.263370099279173e-06,
318
+ "loss": 0.1573,
319
+ "step": 850
320
+ },
321
+ {
322
+ "epoch": 10.057471264367816,
323
+ "grad_norm": 504736.34375,
324
+ "learning_rate": 6.587220777430097e-06,
325
+ "loss": 0.1424,
326
+ "step": 875
327
+ },
328
+ {
329
+ "epoch": 10.344827586206897,
330
+ "grad_norm": 559532.25,
331
+ "learning_rate": 5.928305236133016e-06,
332
+ "loss": 0.1634,
333
+ "step": 900
334
+ },
335
+ {
336
+ "epoch": 10.344827586206897,
337
+ "eval_accuracy": 0.9345238095238095,
338
+ "eval_loss": 0.29700732231140137,
339
+ "eval_runtime": 10.0813,
340
+ "eval_samples_per_second": 16.665,
341
+ "eval_steps_per_second": 1.091,
342
+ "step": 900
343
+ },
344
+ {
345
+ "epoch": 10.632183908045977,
346
+ "grad_norm": 426639.75,
347
+ "learning_rate": 5.2899508531936526e-06,
348
+ "loss": 0.1735,
349
+ "step": 925
350
+ },
351
+ {
352
+ "epoch": 10.919540229885058,
353
+ "grad_norm": 421293.25,
354
+ "learning_rate": 4.675381177113837e-06,
355
+ "loss": 0.2091,
356
+ "step": 950
357
+ },
358
+ {
359
+ "epoch": 11.206896551724139,
360
+ "grad_norm": 1287860.25,
361
+ "learning_rate": 4.087699648884248e-06,
362
+ "loss": 0.1832,
363
+ "step": 975
364
+ },
365
+ {
366
+ "epoch": 11.494252873563218,
367
+ "grad_norm": 596496.875,
368
+ "learning_rate": 3.529873930293546e-06,
369
+ "loss": 0.1483,
370
+ "step": 1000
371
+ },
372
+ {
373
+ "epoch": 11.494252873563218,
374
+ "eval_accuracy": 0.9226190476190477,
375
+ "eval_loss": 0.3032456934452057,
376
+ "eval_runtime": 10.027,
377
+ "eval_samples_per_second": 16.755,
378
+ "eval_steps_per_second": 1.097,
379
+ "step": 1000
380
+ },
381
+ {
382
+ "epoch": 11.781609195402298,
383
+ "grad_norm": 223663.953125,
384
+ "learning_rate": 3.004720917892464e-06,
385
+ "loss": 0.1588,
386
+ "step": 1025
387
+ },
388
+ {
389
+ "epoch": 12.068965517241379,
390
+ "grad_norm": 55755.37109375,
391
+ "learning_rate": 2.514892518288988e-06,
392
+ "loss": 0.1392,
393
+ "step": 1050
394
+ },
395
+ {
396
+ "epoch": 12.35632183908046,
397
+ "grad_norm": 260650.140625,
398
+ "learning_rate": 2.0628622566063063e-06,
399
+ "loss": 0.1493,
400
+ "step": 1075
401
+ },
402
+ {
403
+ "epoch": 12.64367816091954,
404
+ "grad_norm": 60438.0390625,
405
+ "learning_rate": 1.6509127857277784e-06,
406
+ "loss": 0.1077,
407
+ "step": 1100
408
+ },
409
+ {
410
+ "epoch": 12.64367816091954,
411
+ "eval_accuracy": 0.9166666666666666,
412
+ "eval_loss": 0.28394800424575806,
413
+ "eval_runtime": 10.0833,
414
+ "eval_samples_per_second": 16.661,
415
+ "eval_steps_per_second": 1.091,
416
+ "step": 1100
417
+ }
418
+ ],
419
+ "logging_steps": 25,
420
+ "max_steps": 1305,
421
+ "num_input_tokens_seen": 0,
422
+ "num_train_epochs": 15,
423
+ "save_steps": 100,
424
+ "stateful_callbacks": {
425
+ "EarlyStoppingCallback": {
426
+ "args": {
427
+ "early_stopping_patience": 3,
428
+ "early_stopping_threshold": 0.0
429
+ },
430
+ "attributes": {
431
+ "early_stopping_patience_counter": 0
432
+ }
433
+ },
434
+ "TrainerControl": {
435
+ "args": {
436
+ "should_epoch_stop": false,
437
+ "should_evaluate": false,
438
+ "should_log": false,
439
+ "should_save": true,
440
+ "should_training_stop": false
441
+ },
442
+ "attributes": {}
443
+ }
444
+ },
445
+ "total_flos": 4.682951691910107e+18,
446
+ "train_batch_size": 16,
447
+ "trial_name": null,
448
+ "trial_params": null
449
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:083d3480b5664c2448ad1338e2bc8728268febcfbec1253ec2c060e4c6628bf1
3
+ size 5432