LBolitho commited on
Commit
c42140f
·
verified ·
1 Parent(s): bb8e718

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "gelu",
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "WhisperForAudioClassification"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50256
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "classifier_proj_size": 256,
15
+ "d_model": 1280,
16
+ "decoder_attention_heads": 20,
17
+ "decoder_ffn_dim": 5120,
18
+ "decoder_layerdrop": 0.0,
19
+ "decoder_layers": 4,
20
+ "decoder_start_token_id": 50258,
21
+ "dropout": 0.0,
22
+ "dtype": "float32",
23
+ "encoder_attention_heads": 20,
24
+ "encoder_ffn_dim": 5120,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 32,
27
+ "eos_token_id": 50257,
28
+ "init_std": 0.02,
29
+ "is_encoder_decoder": true,
30
+ "mask_feature_length": 10,
31
+ "mask_feature_min_masks": 0,
32
+ "mask_feature_prob": 0.0,
33
+ "mask_time_length": 10,
34
+ "mask_time_min_masks": 2,
35
+ "mask_time_prob": 0.05,
36
+ "max_source_positions": 50,
37
+ "max_target_positions": 448,
38
+ "median_filter_width": 7,
39
+ "model_type": "whisper",
40
+ "num_hidden_layers": 32,
41
+ "num_mel_bins": 128,
42
+ "pad_token_id": 50257,
43
+ "scale_embedding": false,
44
+ "transformers_version": "4.56.1",
45
+ "use_cache": true,
46
+ "use_weighted_layer_sum": false,
47
+ "vocab_size": 51866
48
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80182668b69c07285b58c247dc7247b0b70d3d0b054d62e2f5b8e9332de2f17a
3
+ size 2541820056
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72b390fa766db8618958a089a79768381a2c347dfa69080e9aa675d025475a7
3
+ size 5083449089
preprocessor_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "dither": 0.0,
4
+ "feature_extractor_type": "WhisperFeatureExtractor",
5
+ "feature_size": 128,
6
+ "hop_length": 160,
7
+ "n_fft": 400,
8
+ "n_samples": 480000,
9
+ "nb_max_frames": 3000,
10
+ "padding_side": "right",
11
+ "padding_value": 0.0,
12
+ "processor_class": "WhisperProcessor",
13
+ "return_attention_mask": false,
14
+ "sampling_rate": 16000
15
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a5e082f7d3fac8cb998b4b273337660d6eaa6dfa71e0895b57ae6a9fa154c16
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2812a8fabdb289db1d8c450993f518b5a176efad8bbe135c89aaa82952a3b121
3
+ size 1465
trainer_state.json ADDED
@@ -0,0 +1,925 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1170,
3
+ "best_metric": 0.9991928974979822,
4
+ "best_model_checkpoint": "wav2vec2_frog_classifier_sew_d/checkpoint-1170",
5
+ "epoch": 6.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1170,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.05128205128205128,
14
+ "grad_norm": 10.577055931091309,
15
+ "learning_rate": 1.9230769230769234e-07,
16
+ "loss": 0.6805,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.10256410256410256,
21
+ "grad_norm": 42.849281311035156,
22
+ "learning_rate": 4.05982905982906e-07,
23
+ "loss": 0.6605,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.15384615384615385,
28
+ "grad_norm": 22.91461753845215,
29
+ "learning_rate": 6.196581196581197e-07,
30
+ "loss": 0.6164,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.20512820512820512,
35
+ "grad_norm": 35.23799514770508,
36
+ "learning_rate": 8.333333333333333e-07,
37
+ "loss": 0.5014,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.2564102564102564,
42
+ "grad_norm": 8.47977352142334,
43
+ "learning_rate": 1.047008547008547e-06,
44
+ "loss": 0.3743,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.3076923076923077,
49
+ "grad_norm": 160.9004669189453,
50
+ "learning_rate": 1.2606837606837608e-06,
51
+ "loss": 0.2865,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.358974358974359,
56
+ "grad_norm": 5.323972702026367,
57
+ "learning_rate": 1.4743589743589745e-06,
58
+ "loss": 0.1922,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.41025641025641024,
63
+ "grad_norm": 246.73423767089844,
64
+ "learning_rate": 1.6880341880341883e-06,
65
+ "loss": 0.2413,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.46153846153846156,
70
+ "grad_norm": 5.069357395172119,
71
+ "learning_rate": 1.9017094017094018e-06,
72
+ "loss": 0.1617,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.5128205128205128,
77
+ "grad_norm": 2.5714402198791504,
78
+ "learning_rate": 2.1153846153846155e-06,
79
+ "loss": 0.1292,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.5641025641025641,
84
+ "grad_norm": 284.4996643066406,
85
+ "learning_rate": 2.3290598290598295e-06,
86
+ "loss": 0.0801,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.6153846153846154,
91
+ "grad_norm": 112.67489624023438,
92
+ "learning_rate": 2.542735042735043e-06,
93
+ "loss": 0.2759,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.6666666666666666,
98
+ "grad_norm": 0.44987669587135315,
99
+ "learning_rate": 2.756410256410257e-06,
100
+ "loss": 0.0573,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.717948717948718,
105
+ "grad_norm": 25.903047561645508,
106
+ "learning_rate": 2.9700854700854705e-06,
107
+ "loss": 0.1032,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.7692307692307693,
112
+ "grad_norm": 0.33153071999549866,
113
+ "learning_rate": 3.183760683760684e-06,
114
+ "loss": 0.0777,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.8205128205128205,
119
+ "grad_norm": 0.2545352876186371,
120
+ "learning_rate": 3.397435897435898e-06,
121
+ "loss": 0.1243,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.8717948717948718,
126
+ "grad_norm": 0.19522325694561005,
127
+ "learning_rate": 3.6111111111111115e-06,
128
+ "loss": 0.0335,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.9230769230769231,
133
+ "grad_norm": 0.17608708143234253,
134
+ "learning_rate": 3.8247863247863246e-06,
135
+ "loss": 0.0087,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.9743589743589743,
140
+ "grad_norm": 12.398482322692871,
141
+ "learning_rate": 4.0384615384615385e-06,
142
+ "loss": 0.065,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 1.0,
147
+ "eval_f1": 0.9951768488745981,
148
+ "eval_fbeta": 0.9923051464968622,
149
+ "eval_loss": 0.024110933765769005,
150
+ "eval_precision": 0.9904,
151
+ "eval_recall": 1.0,
152
+ "eval_runtime": 19.5057,
153
+ "eval_samples_per_second": 68.595,
154
+ "eval_steps_per_second": 8.613,
155
+ "step": 195
156
+ },
157
+ {
158
+ "epoch": 1.0256410256410255,
159
+ "grad_norm": 388.4844055175781,
160
+ "learning_rate": 4.2521367521367524e-06,
161
+ "loss": 0.0407,
162
+ "step": 200
163
+ },
164
+ {
165
+ "epoch": 1.0769230769230769,
166
+ "grad_norm": 6.999018669128418,
167
+ "learning_rate": 4.465811965811966e-06,
168
+ "loss": 0.266,
169
+ "step": 210
170
+ },
171
+ {
172
+ "epoch": 1.1282051282051282,
173
+ "grad_norm": 364.2523498535156,
174
+ "learning_rate": 4.6794871794871795e-06,
175
+ "loss": 0.1352,
176
+ "step": 220
177
+ },
178
+ {
179
+ "epoch": 1.1794871794871795,
180
+ "grad_norm": 0.3629947304725647,
181
+ "learning_rate": 4.8931623931623934e-06,
182
+ "loss": 0.0926,
183
+ "step": 230
184
+ },
185
+ {
186
+ "epoch": 1.2307692307692308,
187
+ "grad_norm": 1.258070945739746,
188
+ "learning_rate": 5.1068376068376065e-06,
189
+ "loss": 0.0389,
190
+ "step": 240
191
+ },
192
+ {
193
+ "epoch": 1.282051282051282,
194
+ "grad_norm": 0.1260978728532791,
195
+ "learning_rate": 5.320512820512821e-06,
196
+ "loss": 0.0372,
197
+ "step": 250
198
+ },
199
+ {
200
+ "epoch": 1.3333333333333333,
201
+ "grad_norm": 0.113288514316082,
202
+ "learning_rate": 5.534188034188035e-06,
203
+ "loss": 0.0708,
204
+ "step": 260
205
+ },
206
+ {
207
+ "epoch": 1.3846153846153846,
208
+ "grad_norm": 0.1554139405488968,
209
+ "learning_rate": 5.7478632478632475e-06,
210
+ "loss": 0.1013,
211
+ "step": 270
212
+ },
213
+ {
214
+ "epoch": 1.435897435897436,
215
+ "grad_norm": 0.14283470809459686,
216
+ "learning_rate": 5.961538461538462e-06,
217
+ "loss": 0.0084,
218
+ "step": 280
219
+ },
220
+ {
221
+ "epoch": 1.4871794871794872,
222
+ "grad_norm": 0.12477540969848633,
223
+ "learning_rate": 6.175213675213676e-06,
224
+ "loss": 0.006,
225
+ "step": 290
226
+ },
227
+ {
228
+ "epoch": 1.5384615384615383,
229
+ "grad_norm": 0.7855970859527588,
230
+ "learning_rate": 6.3888888888888885e-06,
231
+ "loss": 0.1667,
232
+ "step": 300
233
+ },
234
+ {
235
+ "epoch": 1.5897435897435899,
236
+ "grad_norm": 0.18101628124713898,
237
+ "learning_rate": 6.602564102564103e-06,
238
+ "loss": 0.1045,
239
+ "step": 310
240
+ },
241
+ {
242
+ "epoch": 1.641025641025641,
243
+ "grad_norm": 0.11964868754148483,
244
+ "learning_rate": 6.816239316239317e-06,
245
+ "loss": 0.0071,
246
+ "step": 320
247
+ },
248
+ {
249
+ "epoch": 1.6923076923076923,
250
+ "grad_norm": 0.09683432430028915,
251
+ "learning_rate": 7.02991452991453e-06,
252
+ "loss": 0.0051,
253
+ "step": 330
254
+ },
255
+ {
256
+ "epoch": 1.7435897435897436,
257
+ "grad_norm": 0.16468480229377747,
258
+ "learning_rate": 7.243589743589744e-06,
259
+ "loss": 0.0689,
260
+ "step": 340
261
+ },
262
+ {
263
+ "epoch": 1.7948717948717947,
264
+ "grad_norm": 0.11897635459899902,
265
+ "learning_rate": 7.457264957264958e-06,
266
+ "loss": 0.0718,
267
+ "step": 350
268
+ },
269
+ {
270
+ "epoch": 1.8461538461538463,
271
+ "grad_norm": 0.08642622083425522,
272
+ "learning_rate": 7.670940170940172e-06,
273
+ "loss": 0.0042,
274
+ "step": 360
275
+ },
276
+ {
277
+ "epoch": 1.8974358974358974,
278
+ "grad_norm": 0.09093068540096283,
279
+ "learning_rate": 7.884615384615384e-06,
280
+ "loss": 0.0388,
281
+ "step": 370
282
+ },
283
+ {
284
+ "epoch": 1.9487179487179487,
285
+ "grad_norm": 0.2035251259803772,
286
+ "learning_rate": 8.098290598290598e-06,
287
+ "loss": 0.0596,
288
+ "step": 380
289
+ },
290
+ {
291
+ "epoch": 2.0,
292
+ "grad_norm": 0.12319870293140411,
293
+ "learning_rate": 8.311965811965812e-06,
294
+ "loss": 0.0388,
295
+ "step": 390
296
+ },
297
+ {
298
+ "epoch": 2.0,
299
+ "eval_f1": 0.9959514170040485,
300
+ "eval_fbeta": 0.9974050446451359,
301
+ "eval_loss": 0.019416945055127144,
302
+ "eval_precision": 0.9983766233766234,
303
+ "eval_recall": 0.9935379644588045,
304
+ "eval_runtime": 19.5687,
305
+ "eval_samples_per_second": 68.375,
306
+ "eval_steps_per_second": 8.585,
307
+ "step": 390
308
+ },
309
+ {
310
+ "epoch": 2.051282051282051,
311
+ "grad_norm": 0.0893879383802414,
312
+ "learning_rate": 8.525641025641026e-06,
313
+ "loss": 0.0043,
314
+ "step": 400
315
+ },
316
+ {
317
+ "epoch": 2.1025641025641026,
318
+ "grad_norm": 0.07179196923971176,
319
+ "learning_rate": 8.73931623931624e-06,
320
+ "loss": 0.0036,
321
+ "step": 410
322
+ },
323
+ {
324
+ "epoch": 2.1538461538461537,
325
+ "grad_norm": 0.06529832631349564,
326
+ "learning_rate": 8.952991452991454e-06,
327
+ "loss": 0.0032,
328
+ "step": 420
329
+ },
330
+ {
331
+ "epoch": 2.2051282051282053,
332
+ "grad_norm": 0.06070837751030922,
333
+ "learning_rate": 9.166666666666666e-06,
334
+ "loss": 0.0151,
335
+ "step": 430
336
+ },
337
+ {
338
+ "epoch": 2.2564102564102564,
339
+ "grad_norm": 0.15651676058769226,
340
+ "learning_rate": 9.38034188034188e-06,
341
+ "loss": 0.1114,
342
+ "step": 440
343
+ },
344
+ {
345
+ "epoch": 2.3076923076923075,
346
+ "grad_norm": 0.07094116508960724,
347
+ "learning_rate": 9.594017094017094e-06,
348
+ "loss": 0.0045,
349
+ "step": 450
350
+ },
351
+ {
352
+ "epoch": 2.358974358974359,
353
+ "grad_norm": 0.06716040521860123,
354
+ "learning_rate": 9.807692307692308e-06,
355
+ "loss": 0.0033,
356
+ "step": 460
357
+ },
358
+ {
359
+ "epoch": 2.41025641025641,
360
+ "grad_norm": 0.05951394885778427,
361
+ "learning_rate": 9.994658119658121e-06,
362
+ "loss": 0.0029,
363
+ "step": 470
364
+ },
365
+ {
366
+ "epoch": 2.4615384615384617,
367
+ "grad_norm": 0.05390568822622299,
368
+ "learning_rate": 9.941239316239318e-06,
369
+ "loss": 0.0026,
370
+ "step": 480
371
+ },
372
+ {
373
+ "epoch": 2.5128205128205128,
374
+ "grad_norm": 0.05104345828294754,
375
+ "learning_rate": 9.887820512820514e-06,
376
+ "loss": 0.0024,
377
+ "step": 490
378
+ },
379
+ {
380
+ "epoch": 2.564102564102564,
381
+ "grad_norm": 0.04729696735739708,
382
+ "learning_rate": 9.83440170940171e-06,
383
+ "loss": 0.0022,
384
+ "step": 500
385
+ },
386
+ {
387
+ "epoch": 2.6153846153846154,
388
+ "grad_norm": 0.04450414329767227,
389
+ "learning_rate": 9.780982905982906e-06,
390
+ "loss": 0.002,
391
+ "step": 510
392
+ },
393
+ {
394
+ "epoch": 2.6666666666666665,
395
+ "grad_norm": 0.041944343596696854,
396
+ "learning_rate": 9.727564102564104e-06,
397
+ "loss": 0.0019,
398
+ "step": 520
399
+ },
400
+ {
401
+ "epoch": 2.717948717948718,
402
+ "grad_norm": 0.03941260650753975,
403
+ "learning_rate": 9.6741452991453e-06,
404
+ "loss": 0.0018,
405
+ "step": 530
406
+ },
407
+ {
408
+ "epoch": 2.769230769230769,
409
+ "grad_norm": 0.037517789751291275,
410
+ "learning_rate": 9.620726495726497e-06,
411
+ "loss": 0.0017,
412
+ "step": 540
413
+ },
414
+ {
415
+ "epoch": 2.8205128205128203,
416
+ "grad_norm": 0.036115214228630066,
417
+ "learning_rate": 9.567307692307693e-06,
418
+ "loss": 0.0016,
419
+ "step": 550
420
+ },
421
+ {
422
+ "epoch": 2.871794871794872,
423
+ "grad_norm": 0.0382937453687191,
424
+ "learning_rate": 9.51388888888889e-06,
425
+ "loss": 0.0418,
426
+ "step": 560
427
+ },
428
+ {
429
+ "epoch": 2.9230769230769234,
430
+ "grad_norm": 0.03789462521672249,
431
+ "learning_rate": 9.460470085470086e-06,
432
+ "loss": 0.0016,
433
+ "step": 570
434
+ },
435
+ {
436
+ "epoch": 2.9743589743589745,
437
+ "grad_norm": 0.03520243614912033,
438
+ "learning_rate": 9.407051282051283e-06,
439
+ "loss": 0.0016,
440
+ "step": 580
441
+ },
442
+ {
443
+ "epoch": 3.0,
444
+ "eval_f1": 0.9983844911147012,
445
+ "eval_fbeta": 0.9983844111147077,
446
+ "eval_loss": 0.010780692100524902,
447
+ "eval_precision": 0.9983844911147012,
448
+ "eval_recall": 0.9983844911147012,
449
+ "eval_runtime": 19.7043,
450
+ "eval_samples_per_second": 67.904,
451
+ "eval_steps_per_second": 8.526,
452
+ "step": 585
453
+ },
454
+ {
455
+ "epoch": 3.0256410256410255,
456
+ "grad_norm": 0.034466702491045,
457
+ "learning_rate": 9.35363247863248e-06,
458
+ "loss": 0.0015,
459
+ "step": 590
460
+ },
461
+ {
462
+ "epoch": 3.076923076923077,
463
+ "grad_norm": 0.032045699656009674,
464
+ "learning_rate": 9.300213675213676e-06,
465
+ "loss": 0.0014,
466
+ "step": 600
467
+ },
468
+ {
469
+ "epoch": 3.128205128205128,
470
+ "grad_norm": 0.03104749321937561,
471
+ "learning_rate": 9.246794871794873e-06,
472
+ "loss": 0.0013,
473
+ "step": 610
474
+ },
475
+ {
476
+ "epoch": 3.1794871794871793,
477
+ "grad_norm": 0.029300186783075333,
478
+ "learning_rate": 9.193376068376069e-06,
479
+ "loss": 0.0013,
480
+ "step": 620
481
+ },
482
+ {
483
+ "epoch": 3.230769230769231,
484
+ "grad_norm": 0.02834387496113777,
485
+ "learning_rate": 9.139957264957266e-06,
486
+ "loss": 0.0012,
487
+ "step": 630
488
+ },
489
+ {
490
+ "epoch": 3.282051282051282,
491
+ "grad_norm": 0.027324577793478966,
492
+ "learning_rate": 9.086538461538462e-06,
493
+ "loss": 0.0012,
494
+ "step": 640
495
+ },
496
+ {
497
+ "epoch": 3.3333333333333335,
498
+ "grad_norm": 0.026796886697411537,
499
+ "learning_rate": 9.033119658119659e-06,
500
+ "loss": 0.0011,
501
+ "step": 650
502
+ },
503
+ {
504
+ "epoch": 3.3846153846153846,
505
+ "grad_norm": 0.025395015254616737,
506
+ "learning_rate": 8.979700854700855e-06,
507
+ "loss": 0.0011,
508
+ "step": 660
509
+ },
510
+ {
511
+ "epoch": 3.435897435897436,
512
+ "grad_norm": 0.024470018222928047,
513
+ "learning_rate": 8.926282051282053e-06,
514
+ "loss": 0.001,
515
+ "step": 670
516
+ },
517
+ {
518
+ "epoch": 3.4871794871794872,
519
+ "grad_norm": 0.02359500713646412,
520
+ "learning_rate": 8.872863247863248e-06,
521
+ "loss": 0.001,
522
+ "step": 680
523
+ },
524
+ {
525
+ "epoch": 3.5384615384615383,
526
+ "grad_norm": 0.02294657565653324,
527
+ "learning_rate": 8.819444444444445e-06,
528
+ "loss": 0.0009,
529
+ "step": 690
530
+ },
531
+ {
532
+ "epoch": 3.58974358974359,
533
+ "grad_norm": 0.02199380099773407,
534
+ "learning_rate": 8.766025641025641e-06,
535
+ "loss": 0.0009,
536
+ "step": 700
537
+ },
538
+ {
539
+ "epoch": 3.641025641025641,
540
+ "grad_norm": 0.0217047818005085,
541
+ "learning_rate": 8.712606837606838e-06,
542
+ "loss": 0.0009,
543
+ "step": 710
544
+ },
545
+ {
546
+ "epoch": 3.6923076923076925,
547
+ "grad_norm": 0.020800307393074036,
548
+ "learning_rate": 8.659188034188036e-06,
549
+ "loss": 0.0008,
550
+ "step": 720
551
+ },
552
+ {
553
+ "epoch": 3.7435897435897436,
554
+ "grad_norm": 0.02008337713778019,
555
+ "learning_rate": 8.605769230769232e-06,
556
+ "loss": 0.0008,
557
+ "step": 730
558
+ },
559
+ {
560
+ "epoch": 3.7948717948717947,
561
+ "grad_norm": 0.02081192284822464,
562
+ "learning_rate": 8.552350427350427e-06,
563
+ "loss": 0.0008,
564
+ "step": 740
565
+ },
566
+ {
567
+ "epoch": 3.8461538461538463,
568
+ "grad_norm": 0.01924249343574047,
569
+ "learning_rate": 8.498931623931624e-06,
570
+ "loss": 0.0008,
571
+ "step": 750
572
+ },
573
+ {
574
+ "epoch": 3.8974358974358974,
575
+ "grad_norm": 0.018428660929203033,
576
+ "learning_rate": 8.445512820512822e-06,
577
+ "loss": 0.0007,
578
+ "step": 760
579
+ },
580
+ {
581
+ "epoch": 3.948717948717949,
582
+ "grad_norm": 0.01805875450372696,
583
+ "learning_rate": 8.392094017094018e-06,
584
+ "loss": 0.0007,
585
+ "step": 770
586
+ },
587
+ {
588
+ "epoch": 4.0,
589
+ "grad_norm": 0.02028258703649044,
590
+ "learning_rate": 8.338675213675215e-06,
591
+ "loss": 0.1822,
592
+ "step": 780
593
+ },
594
+ {
595
+ "epoch": 4.0,
596
+ "eval_f1": 0.9926769731489016,
597
+ "eval_fbeta": 0.9970577813430838,
598
+ "eval_loss": 0.04822924733161926,
599
+ "eval_precision": 1.0,
600
+ "eval_recall": 0.9854604200323102,
601
+ "eval_runtime": 19.6411,
602
+ "eval_samples_per_second": 68.122,
603
+ "eval_steps_per_second": 8.553,
604
+ "step": 780
605
+ },
606
+ {
607
+ "epoch": 4.051282051282051,
608
+ "grad_norm": 0.022473065182566643,
609
+ "learning_rate": 8.285256410256411e-06,
610
+ "loss": 0.0009,
611
+ "step": 790
612
+ },
613
+ {
614
+ "epoch": 4.102564102564102,
615
+ "grad_norm": 0.020844997838139534,
616
+ "learning_rate": 8.231837606837608e-06,
617
+ "loss": 0.0008,
618
+ "step": 800
619
+ },
620
+ {
621
+ "epoch": 4.153846153846154,
622
+ "grad_norm": 0.03373854234814644,
623
+ "learning_rate": 8.178418803418804e-06,
624
+ "loss": 0.0443,
625
+ "step": 810
626
+ },
627
+ {
628
+ "epoch": 4.205128205128205,
629
+ "grad_norm": 11.480348587036133,
630
+ "learning_rate": 8.125000000000001e-06,
631
+ "loss": 0.1932,
632
+ "step": 820
633
+ },
634
+ {
635
+ "epoch": 4.256410256410256,
636
+ "grad_norm": 0.03889571130275726,
637
+ "learning_rate": 8.071581196581197e-06,
638
+ "loss": 0.2656,
639
+ "step": 830
640
+ },
641
+ {
642
+ "epoch": 4.3076923076923075,
643
+ "grad_norm": 0.08549106866121292,
644
+ "learning_rate": 8.018162393162394e-06,
645
+ "loss": 0.078,
646
+ "step": 840
647
+ },
648
+ {
649
+ "epoch": 4.358974358974359,
650
+ "grad_norm": 0.028424395248293877,
651
+ "learning_rate": 7.96474358974359e-06,
652
+ "loss": 0.043,
653
+ "step": 850
654
+ },
655
+ {
656
+ "epoch": 4.410256410256411,
657
+ "grad_norm": 124.80490112304688,
658
+ "learning_rate": 7.911324786324787e-06,
659
+ "loss": 0.0298,
660
+ "step": 860
661
+ },
662
+ {
663
+ "epoch": 4.461538461538462,
664
+ "grad_norm": 0.049465615302324295,
665
+ "learning_rate": 7.857905982905984e-06,
666
+ "loss": 0.0881,
667
+ "step": 870
668
+ },
669
+ {
670
+ "epoch": 4.512820512820513,
671
+ "grad_norm": 0.04318946227431297,
672
+ "learning_rate": 7.80448717948718e-06,
673
+ "loss": 0.0526,
674
+ "step": 880
675
+ },
676
+ {
677
+ "epoch": 4.564102564102564,
678
+ "grad_norm": 0.02768160216510296,
679
+ "learning_rate": 7.751068376068377e-06,
680
+ "loss": 0.0012,
681
+ "step": 890
682
+ },
683
+ {
684
+ "epoch": 4.615384615384615,
685
+ "grad_norm": 0.02875378355383873,
686
+ "learning_rate": 7.697649572649573e-06,
687
+ "loss": 0.0857,
688
+ "step": 900
689
+ },
690
+ {
691
+ "epoch": 4.666666666666667,
692
+ "grad_norm": 0.08129971474409103,
693
+ "learning_rate": 7.64423076923077e-06,
694
+ "loss": 0.0023,
695
+ "step": 910
696
+ },
697
+ {
698
+ "epoch": 4.717948717948718,
699
+ "grad_norm": 0.0326925627887249,
700
+ "learning_rate": 7.590811965811966e-06,
701
+ "loss": 0.0595,
702
+ "step": 920
703
+ },
704
+ {
705
+ "epoch": 4.769230769230769,
706
+ "grad_norm": 0.027897467836737633,
707
+ "learning_rate": 7.537393162393163e-06,
708
+ "loss": 0.0415,
709
+ "step": 930
710
+ },
711
+ {
712
+ "epoch": 4.82051282051282,
713
+ "grad_norm": 0.0258422140032053,
714
+ "learning_rate": 7.48397435897436e-06,
715
+ "loss": 0.0018,
716
+ "step": 940
717
+ },
718
+ {
719
+ "epoch": 4.871794871794872,
720
+ "grad_norm": 0.0415211021900177,
721
+ "learning_rate": 7.4305555555555565e-06,
722
+ "loss": 0.0832,
723
+ "step": 950
724
+ },
725
+ {
726
+ "epoch": 4.923076923076923,
727
+ "grad_norm": 0.024675490334630013,
728
+ "learning_rate": 7.377136752136753e-06,
729
+ "loss": 0.001,
730
+ "step": 960
731
+ },
732
+ {
733
+ "epoch": 4.9743589743589745,
734
+ "grad_norm": 0.024120161309838295,
735
+ "learning_rate": 7.323717948717949e-06,
736
+ "loss": 0.001,
737
+ "step": 970
738
+ },
739
+ {
740
+ "epoch": 5.0,
741
+ "eval_f1": 0.99185667752443,
742
+ "eval_fbeta": 0.9967265967955792,
743
+ "eval_loss": 0.05167613551020622,
744
+ "eval_precision": 1.0,
745
+ "eval_recall": 0.9838449111470113,
746
+ "eval_runtime": 19.7601,
747
+ "eval_samples_per_second": 67.712,
748
+ "eval_steps_per_second": 8.502,
749
+ "step": 975
750
+ },
751
+ {
752
+ "epoch": 5.0256410256410255,
753
+ "grad_norm": 0.04074859991669655,
754
+ "learning_rate": 7.270299145299145e-06,
755
+ "loss": 0.0441,
756
+ "step": 980
757
+ },
758
+ {
759
+ "epoch": 5.076923076923077,
760
+ "grad_norm": 0.024757781997323036,
761
+ "learning_rate": 7.2168803418803426e-06,
762
+ "loss": 0.0009,
763
+ "step": 990
764
+ },
765
+ {
766
+ "epoch": 5.128205128205128,
767
+ "grad_norm": 0.02358800172805786,
768
+ "learning_rate": 7.163461538461539e-06,
769
+ "loss": 0.0388,
770
+ "step": 1000
771
+ },
772
+ {
773
+ "epoch": 5.17948717948718,
774
+ "grad_norm": 0.023568585515022278,
775
+ "learning_rate": 7.110042735042736e-06,
776
+ "loss": 0.001,
777
+ "step": 1010
778
+ },
779
+ {
780
+ "epoch": 5.230769230769231,
781
+ "grad_norm": 0.020907074213027954,
782
+ "learning_rate": 7.056623931623933e-06,
783
+ "loss": 0.0009,
784
+ "step": 1020
785
+ },
786
+ {
787
+ "epoch": 5.282051282051282,
788
+ "grad_norm": 0.01977492682635784,
789
+ "learning_rate": 7.003205128205129e-06,
790
+ "loss": 0.0008,
791
+ "step": 1030
792
+ },
793
+ {
794
+ "epoch": 5.333333333333333,
795
+ "grad_norm": 0.019288958981633186,
796
+ "learning_rate": 6.949786324786325e-06,
797
+ "loss": 0.0008,
798
+ "step": 1040
799
+ },
800
+ {
801
+ "epoch": 5.384615384615385,
802
+ "grad_norm": 0.01873486489057541,
803
+ "learning_rate": 6.896367521367522e-06,
804
+ "loss": 0.0008,
805
+ "step": 1050
806
+ },
807
+ {
808
+ "epoch": 5.435897435897436,
809
+ "grad_norm": 0.018183663487434387,
810
+ "learning_rate": 6.842948717948719e-06,
811
+ "loss": 0.0007,
812
+ "step": 1060
813
+ },
814
+ {
815
+ "epoch": 5.487179487179487,
816
+ "grad_norm": 0.01823146641254425,
817
+ "learning_rate": 6.7895299145299155e-06,
818
+ "loss": 0.0007,
819
+ "step": 1070
820
+ },
821
+ {
822
+ "epoch": 5.538461538461538,
823
+ "grad_norm": 0.017620518803596497,
824
+ "learning_rate": 6.736111111111112e-06,
825
+ "loss": 0.0007,
826
+ "step": 1080
827
+ },
828
+ {
829
+ "epoch": 5.589743589743589,
830
+ "grad_norm": 0.020309826359152794,
831
+ "learning_rate": 6.682692307692308e-06,
832
+ "loss": 0.0455,
833
+ "step": 1090
834
+ },
835
+ {
836
+ "epoch": 5.641025641025641,
837
+ "grad_norm": 0.022107329219579697,
838
+ "learning_rate": 6.629273504273504e-06,
839
+ "loss": 0.0009,
840
+ "step": 1100
841
+ },
842
+ {
843
+ "epoch": 5.6923076923076925,
844
+ "grad_norm": 0.020427662879228592,
845
+ "learning_rate": 6.5758547008547016e-06,
846
+ "loss": 0.0008,
847
+ "step": 1110
848
+ },
849
+ {
850
+ "epoch": 5.743589743589744,
851
+ "grad_norm": 0.019666729494929314,
852
+ "learning_rate": 6.522435897435898e-06,
853
+ "loss": 0.0008,
854
+ "step": 1120
855
+ },
856
+ {
857
+ "epoch": 5.794871794871795,
858
+ "grad_norm": 0.01790018193423748,
859
+ "learning_rate": 6.469017094017095e-06,
860
+ "loss": 0.0007,
861
+ "step": 1130
862
+ },
863
+ {
864
+ "epoch": 5.846153846153846,
865
+ "grad_norm": 0.017409320920705795,
866
+ "learning_rate": 6.415598290598292e-06,
867
+ "loss": 0.0007,
868
+ "step": 1140
869
+ },
870
+ {
871
+ "epoch": 5.897435897435898,
872
+ "grad_norm": 0.016504665836691856,
873
+ "learning_rate": 6.362179487179488e-06,
874
+ "loss": 0.0007,
875
+ "step": 1150
876
+ },
877
+ {
878
+ "epoch": 5.948717948717949,
879
+ "grad_norm": 0.01678137667477131,
880
+ "learning_rate": 6.308760683760684e-06,
881
+ "loss": 0.0006,
882
+ "step": 1160
883
+ },
884
+ {
885
+ "epoch": 6.0,
886
+ "grad_norm": 0.015802927315235138,
887
+ "learning_rate": 6.255341880341881e-06,
888
+ "loss": 0.0006,
889
+ "step": 1170
890
+ },
891
+ {
892
+ "epoch": 6.0,
893
+ "eval_f1": 0.9991928974979822,
894
+ "eval_fbeta": 0.9987091811294363,
895
+ "eval_loss": 0.005148586817085743,
896
+ "eval_precision": 0.9983870967741936,
897
+ "eval_recall": 1.0,
898
+ "eval_runtime": 19.7873,
899
+ "eval_samples_per_second": 67.619,
900
+ "eval_steps_per_second": 8.49,
901
+ "step": 1170
902
+ }
903
+ ],
904
+ "logging_steps": 10,
905
+ "max_steps": 2340,
906
+ "num_input_tokens_seen": 0,
907
+ "num_train_epochs": 12,
908
+ "save_steps": 500,
909
+ "stateful_callbacks": {
910
+ "TrainerControl": {
911
+ "args": {
912
+ "should_epoch_stop": false,
913
+ "should_evaluate": false,
914
+ "should_log": false,
915
+ "should_save": true,
916
+ "should_training_stop": false
917
+ },
918
+ "attributes": {}
919
+ }
920
+ },
921
+ "total_flos": 9.13186778936832e+17,
922
+ "train_batch_size": 16,
923
+ "trial_name": null,
924
+ "trial_params": null
925
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66fb52c99af4f3fabdba40955b0f2597cf91994a897b6b89c60340e35d35072f
3
+ size 5777