deepthink8 commited on
Commit
5157cdd
·
verified ·
1 Parent(s): 22ca917

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_attn_dim": null,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForSequenceClassification"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 256,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": false,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "sum",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": false,
48
+ "dtype": "float32",
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_norm": "group",
52
+ "feat_proj_dropout": 0.1,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "freeze_feat_extract_train": true,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 768,
59
+ "id2label": {
60
+ "0": "fake",
61
+ "1": "real"
62
+ },
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 3072,
65
+ "label2id": {
66
+ "fake": "0",
67
+ "real": "1"
68
+ },
69
+ "layer_norm_eps": 1e-05,
70
+ "layerdrop": 0.0,
71
+ "mask_channel_length": 10,
72
+ "mask_channel_min_space": 1,
73
+ "mask_channel_other": 0.0,
74
+ "mask_channel_prob": 0.0,
75
+ "mask_channel_selection": "static",
76
+ "mask_feature_length": 10,
77
+ "mask_feature_min_masks": 0,
78
+ "mask_feature_prob": 0.0,
79
+ "mask_time_length": 10,
80
+ "mask_time_min_masks": 2,
81
+ "mask_time_min_space": 1,
82
+ "mask_time_other": 0.0,
83
+ "mask_time_prob": 0.05,
84
+ "mask_time_selection": "static",
85
+ "model_type": "wav2vec2",
86
+ "no_mask_channel_overlap": false,
87
+ "no_mask_time_overlap": false,
88
+ "num_adapter_layers": 3,
89
+ "num_attention_heads": 12,
90
+ "num_codevector_groups": 2,
91
+ "num_codevectors_per_group": 320,
92
+ "num_conv_pos_embedding_groups": 16,
93
+ "num_conv_pos_embeddings": 128,
94
+ "num_feat_extract_layers": 7,
95
+ "num_hidden_layers": 12,
96
+ "num_negatives": 100,
97
+ "output_hidden_size": 768,
98
+ "pad_token_id": 0,
99
+ "proj_codevector_dim": 256,
100
+ "tdnn_dilation": [
101
+ 1,
102
+ 2,
103
+ 3,
104
+ 1,
105
+ 1
106
+ ],
107
+ "tdnn_dim": [
108
+ 512,
109
+ 512,
110
+ 512,
111
+ 512,
112
+ 1500
113
+ ],
114
+ "tdnn_kernel": [
115
+ 5,
116
+ 3,
117
+ 3,
118
+ 1,
119
+ 1
120
+ ],
121
+ "transformers_version": "4.57.3",
122
+ "use_weighted_layer_sum": false,
123
+ "vocab_size": 32,
124
+ "xvector_output_dim": 512
125
+ }
last-checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f046095e067203694eb7ac3b56101f802c222508ff36c0e8a4fbc1f72d4125
3
+ size 378302360
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3789a0f05d3e97624e349985237e00b70832481cd75b893e1d50304972007da2
3
+ size 756732602
last-checkpoint/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": false,
8
+ "sampling_rate": 16000
9
+ }
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca69c562ea24f9a8a4e249ce778e6aa3dd1db2c84993f5499e5f9500d2edad8e
3
+ size 14308
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5699a0932df3dd97f15fcfcceb2d361d521d7117e842cbdb8f8aaaa5ca19c383
3
+ size 1064
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 496,
3
+ "best_metric": 0.5052976791120081,
4
+ "best_model_checkpoint": "Deeepfake-audio-Recognition/checkpoint-496",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 496,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.020181634712411706,
14
+ "grad_norm": 0.6100882291793823,
15
+ "learning_rate": 5.399999999999999e-05,
16
+ "loss": 0.6925,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.04036326942482341,
21
+ "grad_norm": 4.445184230804443,
22
+ "learning_rate": 0.00011399999999999999,
23
+ "loss": 0.6948,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.060544904137235116,
28
+ "grad_norm": 2.134990692138672,
29
+ "learning_rate": 0.00017399999999999997,
30
+ "loss": 0.6859,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.08072653884964683,
35
+ "grad_norm": 0.7902814149856567,
36
+ "learning_rate": 0.000234,
37
+ "loss": 0.7172,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.10090817356205853,
42
+ "grad_norm": 0.5187829732894897,
43
+ "learning_rate": 0.000294,
44
+ "loss": 0.698,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.12108980827447023,
49
+ "grad_norm": 0.06201840192079544,
50
+ "learning_rate": 0.00029394618834080715,
51
+ "loss": 0.6939,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.14127144298688193,
56
+ "grad_norm": 0.3845376968383789,
57
+ "learning_rate": 0.000287219730941704,
58
+ "loss": 0.6893,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.16145307769929365,
63
+ "grad_norm": 0.10666169226169586,
64
+ "learning_rate": 0.0002804932735426009,
65
+ "loss": 0.6989,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.18163471241170534,
70
+ "grad_norm": 0.0453961119055748,
71
+ "learning_rate": 0.0002737668161434977,
72
+ "loss": 0.6962,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.20181634712411706,
77
+ "grad_norm": 0.3139711320400238,
78
+ "learning_rate": 0.0002670403587443946,
79
+ "loss": 0.6969,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.22199798183652875,
84
+ "grad_norm": 0.08844541013240814,
85
+ "learning_rate": 0.00026031390134529145,
86
+ "loss": 0.6895,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.24217961654894046,
91
+ "grad_norm": 0.21342194080352783,
92
+ "learning_rate": 0.0002535874439461883,
93
+ "loss": 0.6964,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.2623612512613522,
98
+ "grad_norm": 0.21850763261318207,
99
+ "learning_rate": 0.0002468609865470852,
100
+ "loss": 0.6946,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.28254288597376387,
105
+ "grad_norm": 0.3530184328556061,
106
+ "learning_rate": 0.00024013452914798202,
107
+ "loss": 0.6928,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.30272452068617556,
112
+ "grad_norm": 0.11930827051401138,
113
+ "learning_rate": 0.00023340807174887892,
114
+ "loss": 0.6951,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.3229061553985873,
119
+ "grad_norm": 0.25173401832580566,
120
+ "learning_rate": 0.00022668161434977576,
121
+ "loss": 0.6934,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.343087790110999,
126
+ "grad_norm": 0.25978365540504456,
127
+ "learning_rate": 0.00021995515695067262,
128
+ "loss": 0.6947,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.3632694248234107,
133
+ "grad_norm": 0.07770789414644241,
134
+ "learning_rate": 0.00021322869955156952,
135
+ "loss": 0.6936,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.3834510595358224,
140
+ "grad_norm": 0.01727680303156376,
141
+ "learning_rate": 0.00020650224215246636,
142
+ "loss": 0.6939,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.4036326942482341,
147
+ "grad_norm": 0.11968690156936646,
148
+ "learning_rate": 0.00019977578475336322,
149
+ "loss": 0.6938,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.4238143289606458,
154
+ "grad_norm": 0.21573954820632935,
155
+ "learning_rate": 0.00019304932735426006,
156
+ "loss": 0.6891,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.4439959636730575,
161
+ "grad_norm": 0.2965114414691925,
162
+ "learning_rate": 0.00018632286995515693,
163
+ "loss": 0.7002,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.46417759838546924,
168
+ "grad_norm": 0.21291188895702362,
169
+ "learning_rate": 0.0001795964125560538,
170
+ "loss": 0.6933,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.4843592330978809,
175
+ "grad_norm": 0.19937728345394135,
176
+ "learning_rate": 0.00017286995515695066,
177
+ "loss": 0.693,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.5045408678102926,
182
+ "grad_norm": 0.13156402111053467,
183
+ "learning_rate": 0.00016614349775784752,
184
+ "loss": 0.6934,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.5247225025227044,
189
+ "grad_norm": 0.521071195602417,
190
+ "learning_rate": 0.00015941704035874436,
191
+ "loss": 0.6951,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.544904137235116,
196
+ "grad_norm": 0.09291291236877441,
197
+ "learning_rate": 0.00015269058295964126,
198
+ "loss": 0.6924,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.5650857719475277,
203
+ "grad_norm": 0.023215485736727715,
204
+ "learning_rate": 0.0001459641255605381,
205
+ "loss": 0.6983,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.5852674066599395,
210
+ "grad_norm": 0.3948315978050232,
211
+ "learning_rate": 0.00013923766816143496,
212
+ "loss": 0.6927,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.6054490413723511,
217
+ "grad_norm": 0.20814433693885803,
218
+ "learning_rate": 0.00013251121076233183,
219
+ "loss": 0.6944,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.6256306760847629,
224
+ "grad_norm": 0.47286856174468994,
225
+ "learning_rate": 0.0001257847533632287,
226
+ "loss": 0.6932,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.6458123107971746,
231
+ "grad_norm": 0.19758112728595734,
232
+ "learning_rate": 0.00011905829596412555,
233
+ "loss": 0.6933,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.6659939455095862,
238
+ "grad_norm": 0.20121727883815765,
239
+ "learning_rate": 0.00011233183856502243,
240
+ "loss": 0.6933,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.686175580221998,
245
+ "grad_norm": 0.02322172187268734,
246
+ "learning_rate": 0.00010560538116591928,
247
+ "loss": 0.6929,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.7063572149344097,
252
+ "grad_norm": 0.10014554113149643,
253
+ "learning_rate": 9.887892376681613e-05,
254
+ "loss": 0.6923,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.7265388496468214,
259
+ "grad_norm": 0.09777415543794632,
260
+ "learning_rate": 9.2152466367713e-05,
261
+ "loss": 0.6925,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.7467204843592331,
266
+ "grad_norm": 0.1630220115184784,
267
+ "learning_rate": 8.542600896860985e-05,
268
+ "loss": 0.6947,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.7669021190716448,
273
+ "grad_norm": 0.10113658010959625,
274
+ "learning_rate": 7.869955156950672e-05,
275
+ "loss": 0.6911,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.7870837537840565,
280
+ "grad_norm": 0.23644621670246124,
281
+ "learning_rate": 7.197309417040358e-05,
282
+ "loss": 0.6949,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.8072653884964682,
287
+ "grad_norm": 0.0247635617852211,
288
+ "learning_rate": 6.524663677130043e-05,
289
+ "loss": 0.6928,
290
+ "step": 400
291
+ },
292
+ {
293
+ "epoch": 0.82744702320888,
294
+ "grad_norm": 0.04597044363617897,
295
+ "learning_rate": 5.852017937219731e-05,
296
+ "loss": 0.6931,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 0.8476286579212916,
301
+ "grad_norm": 0.10972382128238678,
302
+ "learning_rate": 5.1793721973094166e-05,
303
+ "loss": 0.6922,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 0.8678102926337034,
308
+ "grad_norm": 0.026838891208171844,
309
+ "learning_rate": 4.5067264573991026e-05,
310
+ "loss": 0.6933,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 0.887991927346115,
315
+ "grad_norm": 0.04809186980128288,
316
+ "learning_rate": 3.834080717488789e-05,
317
+ "loss": 0.6934,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 0.9081735620585267,
322
+ "grad_norm": 0.05220267176628113,
323
+ "learning_rate": 3.161434977578475e-05,
324
+ "loss": 0.6942,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 0.9283551967709385,
329
+ "grad_norm": 0.0148994205519557,
330
+ "learning_rate": 2.4887892376681614e-05,
331
+ "loss": 0.6936,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 0.9485368314833501,
336
+ "grad_norm": 0.07287449389696121,
337
+ "learning_rate": 1.8161434977578473e-05,
338
+ "loss": 0.6932,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 0.9687184661957619,
343
+ "grad_norm": 0.21053215861320496,
344
+ "learning_rate": 1.1434977578475336e-05,
345
+ "loss": 0.6926,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 0.9889001009081736,
350
+ "grad_norm": 0.1443960815668106,
351
+ "learning_rate": 4.708520179372197e-06,
352
+ "loss": 0.694,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 1.0,
357
+ "eval_accuracy": 0.5052976791120081,
358
+ "eval_loss": 0.693091094493866,
359
+ "eval_runtime": 89.1283,
360
+ "eval_samples_per_second": 44.475,
361
+ "eval_steps_per_second": 5.565,
362
+ "step": 496
363
+ }
364
+ ],
365
+ "logging_steps": 10,
366
+ "max_steps": 496,
367
+ "num_input_tokens_seen": 0,
368
+ "num_train_epochs": 1,
369
+ "save_steps": 500,
370
+ "stateful_callbacks": {
371
+ "TrainerControl": {
372
+ "args": {
373
+ "should_epoch_stop": false,
374
+ "should_evaluate": false,
375
+ "should_log": false,
376
+ "should_save": true,
377
+ "should_training_stop": true
378
+ },
379
+ "attributes": {}
380
+ }
381
+ },
382
+ "total_flos": 1.4392356324192e+17,
383
+ "train_batch_size": 8,
384
+ "trial_name": null,
385
+ "trial_params": null
386
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0317a4be8305d7c63b70fad14cde7804b597568fd2181ea062cb92553a0917d9
3
+ size 5496