Wellington Vumu commited on
faster-whisper-medium-sw
Browse files- README.md +18 -32
- config.json +12 -11
- generation_config.json +13 -27
- model.safetensors +2 -2
- runs/Oct26_19-08-48_6822e7700798/events.out.tfevents.1761505738.6822e7700798.1087.0 +3 -0
- training_args.bin +2 -2
README.md
CHANGED
|
@@ -1,11 +1,9 @@
|
|
| 1 |
---
|
| 2 |
library_name: transformers
|
| 3 |
license: apache-2.0
|
| 4 |
-
base_model: openai/whisper-
|
| 5 |
tags:
|
| 6 |
- generated_from_trainer
|
| 7 |
-
metrics:
|
| 8 |
-
- wer
|
| 9 |
model-index:
|
| 10 |
- name: output
|
| 11 |
results: []
|
|
@@ -16,11 +14,16 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 16 |
|
| 17 |
# output
|
| 18 |
|
| 19 |
-
This model is a fine-tuned version of [openai/whisper-
|
| 20 |
It achieves the following results on the evaluation set:
|
| 21 |
-
-
|
| 22 |
-
-
|
| 23 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
## Model description
|
| 26 |
|
|
@@ -40,37 +43,20 @@ More information needed
|
|
| 40 |
|
| 41 |
The following hyperparameters were used during training:
|
| 42 |
- learning_rate: 5e-06
|
| 43 |
-
- train_batch_size:
|
| 44 |
- eval_batch_size: 8
|
| 45 |
- seed: 42
|
| 46 |
-
- gradient_accumulation_steps:
|
| 47 |
-
- total_train_batch_size:
|
| 48 |
-
- optimizer: Use OptimizerNames.
|
| 49 |
- lr_scheduler_type: cosine
|
| 50 |
-
- lr_scheduler_warmup_steps:
|
| 51 |
-
- training_steps:
|
| 52 |
- mixed_precision_training: Native AMP
|
| 53 |
|
| 54 |
-
### Training results
|
| 55 |
-
|
| 56 |
-
| Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
|
| 57 |
-
|:-------------:|:------:|:----:|:---------------:|:------:|:------:|
|
| 58 |
-
| 0.9794 | 0.7882 | 200 | 1.4261 | 0.7751 | 0.2368 |
|
| 59 |
-
| 0.4482 | 1.5754 | 400 | 0.9341 | 0.5545 | 0.1791 |
|
| 60 |
-
| 0.2978 | 2.3626 | 600 | 0.7387 | 0.4951 | 0.1833 |
|
| 61 |
-
| 0.2087 | 3.1498 | 800 | 0.6379 | 0.6575 | 0.3344 |
|
| 62 |
-
| 0.1782 | 3.9379 | 1000 | 0.5686 | 0.3875 | 0.1540 |
|
| 63 |
-
| 0.1275 | 4.7251 | 1200 | 0.5456 | 0.3472 | 0.1295 |
|
| 64 |
-
| 0.0894 | 5.5123 | 1400 | 0.5294 | 0.4248 | 0.2279 |
|
| 65 |
-
| 0.05 | 6.2995 | 1600 | 0.5384 | 0.3768 | 0.1744 |
|
| 66 |
-
| 0.0471 | 7.0867 | 1800 | 0.5511 | 0.5286 | 0.3003 |
|
| 67 |
-
| 0.0297 | 7.8749 | 2000 | 0.5557 | 0.3863 | 0.1668 |
|
| 68 |
-
| 0.0167 | 8.6621 | 2200 | 0.5610 | 0.4369 | 0.2391 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
### Framework versions
|
| 72 |
|
| 73 |
-
- Transformers 4.
|
| 74 |
- Pytorch 2.8.0+cu126
|
| 75 |
- Datasets 3.6.0
|
| 76 |
-
- Tokenizers 0.
|
|
|
|
| 1 |
---
|
| 2 |
library_name: transformers
|
| 3 |
license: apache-2.0
|
| 4 |
+
base_model: openai/whisper-medium
|
| 5 |
tags:
|
| 6 |
- generated_from_trainer
|
|
|
|
|
|
|
| 7 |
model-index:
|
| 8 |
- name: output
|
| 9 |
results: []
|
|
|
|
| 14 |
|
| 15 |
# output
|
| 16 |
|
| 17 |
+
This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on an unknown dataset.
|
| 18 |
It achieves the following results on the evaluation set:
|
| 19 |
+
- eval_loss: 0.4645
|
| 20 |
+
- eval_wer: 0.2780
|
| 21 |
+
- eval_cer: 0.0891
|
| 22 |
+
- eval_runtime: 480.529
|
| 23 |
+
- eval_samples_per_second: 1.199
|
| 24 |
+
- eval_steps_per_second: 0.15
|
| 25 |
+
- epoch: 1.3879
|
| 26 |
+
- step: 1600
|
| 27 |
|
| 28 |
## Model description
|
| 29 |
|
|
|
|
| 43 |
|
| 44 |
The following hyperparameters were used during training:
|
| 45 |
- learning_rate: 5e-06
|
| 46 |
+
- train_batch_size: 2
|
| 47 |
- eval_batch_size: 8
|
| 48 |
- seed: 42
|
| 49 |
+
- gradient_accumulation_steps: 2
|
| 50 |
+
- total_train_batch_size: 4
|
| 51 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 52 |
- lr_scheduler_type: cosine
|
| 53 |
+
- lr_scheduler_warmup_steps: 500
|
| 54 |
+
- training_steps: 5000
|
| 55 |
- mixed_precision_training: Native AMP
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
### Framework versions
|
| 58 |
|
| 59 |
+
- Transformers 4.48.0
|
| 60 |
- Pytorch 2.8.0+cu126
|
| 61 |
- Datasets 3.6.0
|
| 62 |
+
- Tokenizers 0.21.4
|
config.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"activation_dropout": 0.0,
|
| 3 |
"activation_function": "gelu",
|
| 4 |
"apply_spec_augment": false,
|
|
@@ -9,18 +10,17 @@
|
|
| 9 |
"begin_suppress_tokens": null,
|
| 10 |
"bos_token_id": 50257,
|
| 11 |
"classifier_proj_size": 256,
|
| 12 |
-
"d_model":
|
| 13 |
-
"decoder_attention_heads":
|
| 14 |
-
"decoder_ffn_dim":
|
| 15 |
"decoder_layerdrop": 0.0,
|
| 16 |
-
"decoder_layers":
|
| 17 |
"decoder_start_token_id": 50258,
|
| 18 |
"dropout": 0.0,
|
| 19 |
-
"
|
| 20 |
-
"
|
| 21 |
-
"encoder_ffn_dim": 3072,
|
| 22 |
"encoder_layerdrop": 0.0,
|
| 23 |
-
"encoder_layers":
|
| 24 |
"eos_token_id": 50257,
|
| 25 |
"forced_decoder_ids": [
|
| 26 |
[
|
|
@@ -49,12 +49,13 @@
|
|
| 49 |
"max_target_positions": 448,
|
| 50 |
"median_filter_width": 7,
|
| 51 |
"model_type": "whisper",
|
| 52 |
-
"num_hidden_layers":
|
| 53 |
"num_mel_bins": 80,
|
| 54 |
"pad_token_id": 50257,
|
| 55 |
"scale_embedding": false,
|
| 56 |
-
"
|
| 57 |
-
"
|
|
|
|
| 58 |
"use_weighted_layer_sum": false,
|
| 59 |
"vocab_size": 51865
|
| 60 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "openai/whisper-medium",
|
| 3 |
"activation_dropout": 0.0,
|
| 4 |
"activation_function": "gelu",
|
| 5 |
"apply_spec_augment": false,
|
|
|
|
| 10 |
"begin_suppress_tokens": null,
|
| 11 |
"bos_token_id": 50257,
|
| 12 |
"classifier_proj_size": 256,
|
| 13 |
+
"d_model": 1024,
|
| 14 |
+
"decoder_attention_heads": 16,
|
| 15 |
+
"decoder_ffn_dim": 4096,
|
| 16 |
"decoder_layerdrop": 0.0,
|
| 17 |
+
"decoder_layers": 24,
|
| 18 |
"decoder_start_token_id": 50258,
|
| 19 |
"dropout": 0.0,
|
| 20 |
+
"encoder_attention_heads": 16,
|
| 21 |
+
"encoder_ffn_dim": 4096,
|
|
|
|
| 22 |
"encoder_layerdrop": 0.0,
|
| 23 |
+
"encoder_layers": 24,
|
| 24 |
"eos_token_id": 50257,
|
| 25 |
"forced_decoder_ids": [
|
| 26 |
[
|
|
|
|
| 49 |
"max_target_positions": 448,
|
| 50 |
"median_filter_width": 7,
|
| 51 |
"model_type": "whisper",
|
| 52 |
+
"num_hidden_layers": 24,
|
| 53 |
"num_mel_bins": 80,
|
| 54 |
"pad_token_id": 50257,
|
| 55 |
"scale_embedding": false,
|
| 56 |
+
"torch_dtype": "float32",
|
| 57 |
+
"transformers_version": "4.48.0",
|
| 58 |
+
"use_cache": false,
|
| 59 |
"use_weighted_layer_sum": false,
|
| 60 |
"vocab_size": 51865
|
| 61 |
}
|
generation_config.json
CHANGED
|
@@ -1,44 +1,28 @@
|
|
| 1 |
{
|
| 2 |
"alignment_heads": [
|
| 3 |
[
|
| 4 |
-
|
| 5 |
-
|
| 6 |
],
|
| 7 |
[
|
| 8 |
-
|
| 9 |
-
9
|
| 10 |
-
],
|
| 11 |
-
[
|
| 12 |
-
8,
|
| 13 |
-
0
|
| 14 |
-
],
|
| 15 |
-
[
|
| 16 |
-
8,
|
| 17 |
4
|
| 18 |
],
|
| 19 |
[
|
| 20 |
-
|
| 21 |
-
|
| 22 |
],
|
| 23 |
[
|
| 24 |
-
|
| 25 |
-
|
| 26 |
],
|
| 27 |
[
|
| 28 |
-
|
| 29 |
0
|
| 30 |
],
|
| 31 |
[
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
],
|
| 35 |
-
[
|
| 36 |
-
9,
|
| 37 |
-
9
|
| 38 |
-
],
|
| 39 |
-
[
|
| 40 |
-
10,
|
| 41 |
-
5
|
| 42 |
]
|
| 43 |
],
|
| 44 |
"begin_suppress_tokens": [
|
|
@@ -250,6 +234,8 @@
|
|
| 250 |
49870,
|
| 251 |
50254,
|
| 252 |
50258,
|
|
|
|
|
|
|
| 253 |
50360,
|
| 254 |
50361,
|
| 255 |
50362
|
|
@@ -258,5 +244,5 @@
|
|
| 258 |
"transcribe": 50359,
|
| 259 |
"translate": 50358
|
| 260 |
},
|
| 261 |
-
"transformers_version": "4.
|
| 262 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"alignment_heads": [
|
| 3 |
[
|
| 4 |
+
13,
|
| 5 |
+
15
|
| 6 |
],
|
| 7 |
[
|
| 8 |
+
15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
4
|
| 10 |
],
|
| 11 |
[
|
| 12 |
+
15,
|
| 13 |
+
15
|
| 14 |
],
|
| 15 |
[
|
| 16 |
+
16,
|
| 17 |
+
1
|
| 18 |
],
|
| 19 |
[
|
| 20 |
+
20,
|
| 21 |
0
|
| 22 |
],
|
| 23 |
[
|
| 24 |
+
23,
|
| 25 |
+
4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
]
|
| 27 |
],
|
| 28 |
"begin_suppress_tokens": [
|
|
|
|
| 234 |
49870,
|
| 235 |
50254,
|
| 236 |
50258,
|
| 237 |
+
50358,
|
| 238 |
+
50359,
|
| 239 |
50360,
|
| 240 |
50361,
|
| 241 |
50362
|
|
|
|
| 244 |
"transcribe": 50359,
|
| 245 |
"translate": 50358
|
| 246 |
},
|
| 247 |
+
"transformers_version": "4.48.0"
|
| 248 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2accc30dbb7087dedb0e6da93ec0df02ea66fe1f7cff43b9c73fcb5a3307b1b0
|
| 3 |
+
size 3055544304
|
runs/Oct26_19-08-48_6822e7700798/events.out.tfevents.1761505738.6822e7700798.1087.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:260ed5c83790e40c044187a76d5cd681aec915519049fd7a8c3ad11a6ed0f9a6
|
| 3 |
+
size 16502
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b42fcdd4228464785684848906472a811a7029daa8bfd596e123baf04d54a07b
|
| 3 |
+
size 5905
|