Zarnabh commited on
Commit
f1e4155
·
verified ·
1 Parent(s): a40d180

End of training

Browse files
README.md CHANGED
@@ -6,8 +6,8 @@ license: apache-2.0
6
  base_model: openai/whisper-small
7
  tags:
8
  - generated_from_trainer
9
- datasets:
10
- - ihanif/common_voice_ps_20_0
11
  model-index:
12
  - name: Whisper small Ps - ZFA
13
  results: []
@@ -18,9 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # Whisper small Ps - ZFA
20
 
21
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Common Voice 20.0 dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.7437
 
24
 
25
  ## Model description
26
 
@@ -39,27 +40,32 @@ More information needed
39
  ### Training hyperparameters
40
 
41
  The following hyperparameters were used during training:
42
- - learning_rate: 5e-05
43
- - train_batch_size: 4
44
- - eval_batch_size: 4
45
  - seed: 42
46
- - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 
 
47
  - lr_scheduler_type: linear
48
- - num_epochs: 3.0
 
 
49
 
50
  ### Training results
51
 
52
- | Training Loss | Epoch | Step | Validation Loss |
53
- |:-------------:|:------:|:----:|:---------------:|
54
- | 0.2314 | 0.7418 | 500 | 0.8094 |
55
- | 0.1647 | 1.4837 | 1000 | 0.7804 |
56
- | 0.0946 | 2.2255 | 1500 | 0.7939 |
57
- | 0.0309 | 2.9674 | 2000 | 0.7437 |
 
58
 
59
 
60
  ### Framework versions
61
 
62
- - Transformers 4.56.2
63
- - Pytorch 2.7.0+cu126
64
- - Datasets 4.1.1
65
- - Tokenizers 0.22.0
 
6
  base_model: openai/whisper-small
7
  tags:
8
  - generated_from_trainer
9
+ metrics:
10
+ - wer
11
  model-index:
12
  - name: Whisper small Ps - ZFA
13
  results: []
 
18
 
19
  # Whisper small Ps - ZFA
20
 
21
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Sir Shibli dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.7353
24
+ - Wer: 26.1787
25
 
26
  ## Model description
27
 
 
40
  ### Training hyperparameters
41
 
42
  The following hyperparameters were used during training:
43
+ - learning_rate: 2e-05
44
+ - train_batch_size: 1
45
+ - eval_batch_size: 1
46
  - seed: 42
47
+ - gradient_accumulation_steps: 8
48
+ - total_train_batch_size: 8
49
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
50
  - lr_scheduler_type: linear
51
+ - lr_scheduler_warmup_steps: 200
52
+ - training_steps: 1500
53
+ - mixed_precision_training: Native AMP
54
 
55
  ### Training results
56
 
57
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
58
+ |:-------------:|:-------:|:----:|:---------------:|:-------:|
59
+ | No log | 2.8341 | 300 | 0.5501 | 32.4650 |
60
+ | 5.4773 | 5.6635 | 600 | 0.5894 | 28.7836 |
61
+ | 5.4773 | 8.4929 | 900 | 0.6551 | 27.0398 |
62
+ | 0.1933 | 11.3223 | 1200 | 0.7140 | 26.7600 |
63
+ | 0.0125 | 14.1517 | 1500 | 0.7353 | 26.1787 |
64
 
65
 
66
  ### Framework versions
67
 
68
+ - Transformers 5.0.0
69
+ - Pytorch 2.9.0+cu126
70
+ - Datasets 4.0.0
71
+ - Tokenizers 0.22.2
config.json CHANGED
@@ -6,7 +6,6 @@
6
  "WhisperForConditionalGeneration"
7
  ],
8
  "attention_dropout": 0.0,
9
- "begin_suppress_tokens": null,
10
  "bos_token_id": 50257,
11
  "classifier_proj_size": 256,
12
  "d_model": 768,
@@ -22,20 +21,8 @@
22
  "encoder_layerdrop": 0.0,
23
  "encoder_layers": 12,
24
  "eos_token_id": 50257,
25
- "forced_decoder_ids": [
26
- [
27
- 1,
28
- 50259
29
- ],
30
- [
31
- 2,
32
- 50359
33
- ],
34
- [
35
- 3,
36
- 50363
37
- ]
38
- ],
39
  "init_std": 0.02,
40
  "is_encoder_decoder": true,
41
  "mask_feature_length": 10,
@@ -44,7 +31,6 @@
44
  "mask_time_length": 10,
45
  "mask_time_min_masks": 2,
46
  "mask_time_prob": 0.05,
47
- "max_length": null,
48
  "max_source_positions": 1500,
49
  "max_target_positions": 448,
50
  "median_filter_width": 7,
@@ -53,8 +39,10 @@
53
  "num_mel_bins": 80,
54
  "pad_token_id": 50257,
55
  "scale_embedding": false,
56
- "transformers_version": "4.56.2",
57
- "use_cache": true,
 
 
58
  "use_weighted_layer_sum": false,
59
  "vocab_size": 51865
60
  }
 
6
  "WhisperForConditionalGeneration"
7
  ],
8
  "attention_dropout": 0.0,
 
9
  "bos_token_id": 50257,
10
  "classifier_proj_size": 256,
11
  "d_model": 768,
 
21
  "encoder_layerdrop": 0.0,
22
  "encoder_layers": 12,
23
  "eos_token_id": 50257,
24
+ "forced_decoder_ids": null,
25
+ "gradient_checkpointing": false,
 
 
 
 
 
 
 
 
 
 
 
 
26
  "init_std": 0.02,
27
  "is_encoder_decoder": true,
28
  "mask_feature_length": 10,
 
31
  "mask_time_length": 10,
32
  "mask_time_min_masks": 2,
33
  "mask_time_prob": 0.05,
 
34
  "max_source_positions": 1500,
35
  "max_target_positions": 448,
36
  "median_filter_width": 7,
 
39
  "num_mel_bins": 80,
40
  "pad_token_id": 50257,
41
  "scale_embedding": false,
42
+ "suppress_tokens": null,
43
+ "tie_word_embeddings": true,
44
+ "transformers_version": "5.0.0",
45
+ "use_cache": false,
46
  "use_weighted_layer_sum": false,
47
  "vocab_size": 51865
48
  }
generation_config.json CHANGED
@@ -41,16 +41,38 @@
41
  5
42
  ]
43
  ],
 
 
44
  "begin_suppress_tokens": [
45
  220,
46
  50257
47
  ],
48
  "bos_token_id": 50257,
49
  "decoder_start_token_id": 50258,
 
 
 
 
 
50
  "eos_token_id": [
51
  50257
52
  ],
53
- "forced_decoder_ids": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  "is_multilingual": true,
55
  "lang_to_id": {
56
  "<|af|>": 50327,
@@ -153,12 +175,23 @@
153
  "<|yo|>": 50325,
154
  "<|zh|>": 50260
155
  },
156
- "language": "pashto",
157
  "max_initial_timestamp_index": 50,
158
  "max_length": 448,
 
 
159
  "no_timestamps_token_id": 50363,
 
 
 
 
 
 
160
  "pad_token_id": 50257,
161
  "prev_sot_token_id": 50361,
 
 
 
162
  "return_timestamps": false,
163
  "suppress_tokens": [
164
  1,
@@ -244,14 +277,21 @@
244
  49870,
245
  50254,
246
  50258,
 
 
247
  50360,
248
  50361,
249
  50362
250
  ],
251
- "task": "transcribe",
252
  "task_to_id": {
253
  "transcribe": 50359,
254
  "translate": 50358
255
  },
256
- "transformers_version": "4.56.2"
 
 
 
 
 
257
  }
 
41
  5
42
  ]
43
  ],
44
+ "assistant_confidence_threshold": 0.4,
45
+ "assistant_lookbehind": 10,
46
  "begin_suppress_tokens": [
47
  220,
48
  50257
49
  ],
50
  "bos_token_id": 50257,
51
  "decoder_start_token_id": 50258,
52
+ "diversity_penalty": 0.0,
53
+ "do_sample": false,
54
+ "early_stopping": false,
55
+ "encoder_no_repeat_ngram_size": 0,
56
+ "encoder_repetition_penalty": 1.0,
57
  "eos_token_id": [
58
  50257
59
  ],
60
+ "epsilon_cutoff": 0.0,
61
+ "eta_cutoff": 0.0,
62
+ "forced_decoder_ids": [
63
+ [
64
+ 1,
65
+ 50340
66
+ ],
67
+ [
68
+ 2,
69
+ 50359
70
+ ],
71
+ [
72
+ 3,
73
+ 50363
74
+ ]
75
+ ],
76
  "is_multilingual": true,
77
  "lang_to_id": {
78
  "<|af|>": 50327,
 
175
  "<|yo|>": 50325,
176
  "<|zh|>": 50260
177
  },
178
+ "length_penalty": 1.0,
179
  "max_initial_timestamp_index": 50,
180
  "max_length": 448,
181
+ "min_length": 0,
182
+ "no_repeat_ngram_size": 0,
183
  "no_timestamps_token_id": 50363,
184
+ "num_assistant_tokens": 20,
185
+ "num_assistant_tokens_schedule": "constant",
186
+ "num_beam_groups": 1,
187
+ "num_beams": 1,
188
+ "num_return_sequences": 1,
189
+ "output_scores": false,
190
  "pad_token_id": 50257,
191
  "prev_sot_token_id": 50361,
192
+ "remove_invalid_values": false,
193
+ "repetition_penalty": 1.0,
194
+ "return_dict_in_generate": false,
195
  "return_timestamps": false,
196
  "suppress_tokens": [
197
  1,
 
277
  49870,
278
  50254,
279
  50258,
280
+ 50358,
281
+ 50359,
282
  50360,
283
  50361,
284
  50362
285
  ],
286
+ "target_lookbehind": 10,
287
  "task_to_id": {
288
  "transcribe": 50359,
289
  "translate": 50358
290
  },
291
+ "temperature": 1.0,
292
+ "top_k": 50,
293
+ "top_p": 1.0,
294
+ "transformers_version": "5.0.0",
295
+ "typical_p": 1.0,
296
+ "use_cache": true
297
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0b8e8eb4ef66a77252bdb11f79e1283058141825c38ed47ebb1730899d20a9d
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ae5fc169fe3f2580bf45eae61f9e26933d21adf4a29d3cdddf435015f054983
3
  size 966995080
processor_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_extractor": {
3
+ "chunk_length": 30,
4
+ "dither": 0.0,
5
+ "feature_extractor_type": "WhisperFeatureExtractor",
6
+ "feature_size": 80,
7
+ "hop_length": 160,
8
+ "n_fft": 400,
9
+ "n_samples": 480000,
10
+ "nb_max_frames": 3000,
11
+ "padding_side": "right",
12
+ "padding_value": 0.0,
13
+ "return_attention_mask": false,
14
+ "sampling_rate": 16000
15
+ },
16
+ "processor_class": "WhisperProcessor"
17
+ }
runs/Feb07_07-42-34_d8ef86bfc6b2/events.out.tfevents.1770450154.d8ef86bfc6b2.1775.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b94535f93a4634db1a4da9a9a6fffdd162c4da05f1553e190a78a1b344cd707b
3
+ size 5057
runs/Feb07_07-56-25_d8ef86bfc6b2/events.out.tfevents.1770450985.d8ef86bfc6b2.1775.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e9aad18c3e01e6b3f65e9cdbdcaf4b7a65cd9aa5e7ae4ae6802207285ae77a
3
+ size 5057
runs/Feb07_08-18-44_d8ef86bfc6b2/events.out.tfevents.1770452324.d8ef86bfc6b2.1775.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a60f6c382708510c57dcfc6f865001a981e345d3cdfcb13131d27d5a291794e
3
+ size 5906
runs/Feb07_09-06-10_d8ef86bfc6b2/events.out.tfevents.1770455170.d8ef86bfc6b2.27087.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:673e4fa6a08337462b4294b0d95d6c1f27b88ef7d0b1b27a8fef7ceff010e6ba
3
+ size 7636
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0a5392fbd6d7d272e54fe9b56130db6a6cfaaec42b8e231024748589cc2e27b
3
- size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fca71909df634f830182537b67d18d35ea04811ecd33f61c71febff2b1f5886d
3
+ size 5329