mrmuminov
/

whisper-small-uz

@@ -1,17 +1,17 @@
 ---
 language:
 - uz
 license: apache-2.0
 base_model: openai/whisper-small
 tags:
-- hf-asr-leaderboard
 - generated_from_trainer
 datasets:
 - mozilla-foundation/common_voice_16_1
 metrics:
 - wer
 model-index:
-- name: Whisper Small Uz - Bahriddin Mo'minov
   results:
   - task:
       name: Automatic Speech Recognition
@@ -25,18 +25,18 @@ model-index:
     metrics:
     - name: Wer
       type: wer
-      value: 37.07903050585018
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# Whisper Small Uz - Bahriddin Mo'minov
 This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Common Voice 16.1 dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3759
-- Wer: 37.0790
 ## Model description
@@ -56,30 +56,52 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
-- train_batch_size: 8
-- eval_batch_size: 4
 - seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 16
-- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- lr_scheduler_warmup_steps: 500
-- training_steps: 4000
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Wer     |
-|:-------------:|:-----:|:----:|:---------------:|:-------:|
-| 0.6057        | 0.26  | 1000 | 0.5283          | 46.5667 |
-| 0.436         | 0.53  | 2000 | 0.4354          | 42.1575 |
-| 0.4144        | 0.79  | 3000 | 0.3925          | 38.4788 |
-| 0.3194        | 1.06  | 4000 | 0.3759          | 37.0790 |
 ### Framework versions
-- Transformers 4.37.2
-- Pytorch 2.2.2+cu121
-- Datasets 2.18.0
-- Tokenizers 0.15.2

 ---
+library_name: transformers
 language:
 - uz
 license: apache-2.0
 base_model: openai/whisper-small
 tags:
 - generated_from_trainer
 datasets:
 - mozilla-foundation/common_voice_16_1
 metrics:
 - wer
 model-index:
+- name: Whisper Small UZ - Bahriddin Muminov
   results:
   - task:
       name: Automatic Speech Recognition
     metrics:
     - name: Wer
       type: wer
+      value: 24.46336925439856
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper Small UZ - Bahriddin Muminov
 This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Common Voice 16.1 dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.2593
+- Wer: 24.4634
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
+- train_batch_size: 16
+- eval_batch_size: 8
 - seed: 42
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 1000
+- training_steps: 57000
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch  | Step  | Validation Loss | Wer     |
+|:-------------:|:------:|:-----:|:---------------:|:-------:|
+| 0.357         | 0.0352 | 2000  | 0.4996          | 42.0801 |
+| 0.2917        | 0.0704 | 4000  | 0.4227          | 36.4010 |
+| 0.2222        | 0.1056 | 6000  | 0.3806          | 36.8330 |
+| 0.2127        | 0.1408 | 8000  | 0.3559          | 31.9044 |
+| 0.2131        | 0.1760 | 10000 | 0.3392          | 32.2440 |
+| 0.2283        | 0.2112 | 12000 | 0.3387          | 30.3111 |
+| 0.2056        | 0.2464 | 14000 | 0.3301          | 29.3033 |
+| 0.1956        | 0.2816 | 16000 | 0.3195          | 30.3610 |
+| 0.1819        | 0.3168 | 18000 | 0.3076          | 30.7056 |
+| 0.1969        | 0.3520 | 20000 | 0.3033          | 29.4395 |
+| 0.156         | 0.3872 | 22000 | 0.3137          | 28.3081 |
+| 0.1521        | 0.4224 | 24000 | 0.2946          | 28.2145 |
+| 0.1736        | 0.4576 | 26000 | 0.2952          | 27.6800 |
+| 0.1647        | 0.4928 | 28000 | 0.2889          | 26.7835 |
+| 0.1596        | 0.5280 | 30000 | 0.2923          | 26.6998 |
+| 0.1586        | 0.5632 | 32000 | 0.2821          | 26.6561 |
+| 0.1299        | 0.5984 | 34000 | 0.2775          | 26.9783 |
+| 0.1564        | 0.6336 | 36000 | 0.2811          | 26.4600 |
+| 0.1525        | 0.6688 | 38000 | 0.2699          | 26.7485 |
+| 0.1469        | 0.7041 | 40000 | 0.2699          | 26.2765 |
+| 0.1362        | 0.7393 | 42000 | 0.2666          | 25.4761 |
+| 0.1268        | 0.7745 | 44000 | 0.2590          | 26.6236 |
+| 0.1389        | 0.8097 | 46000 | 0.2617          | 25.5485 |
+| 0.1277        | 0.8449 | 48000 | 0.2600          | 24.7443 |
+| 0.1312        | 0.8801 | 50000 | 0.2633          | 24.9579 |
+| 0.1431        | 0.9153 | 52000 | 0.2604          | 24.8180 |
+| 0.1366        | 0.9505 | 54000 | 0.2601          | 24.4384 |
+| 0.1363        | 0.9857 | 56000 | 0.2593          | 24.4634 |
 ### Framework versions
+- Transformers 4.49.0
+- Pytorch 2.6.0+cu124
+- Datasets 3.4.1
+- Tokenizers 0.21.1

generation_config.json CHANGED Viewed

@@ -51,11 +51,15 @@
   "forced_decoder_ids": [
     [
       1,
-      null
     ],
     [
       2,
       50359
     ]
   ],
   "is_multilingual": true,
@@ -250,8 +254,6 @@
     49870,
     50254,
     50258,
-    50358,
-    50359,
     50360,
     50361,
     50362
@@ -260,5 +262,5 @@
     "transcribe": 50359,
     "translate": 50358
   },
-  "transformers_version": "4.37.2"
 }

   "forced_decoder_ids": [
     [
       1,
+      50337
     ],
     [
       2,
       50359
+    ],
+    [
+      3,
+      50363
     ]
   ],
   "is_multilingual": true,
     49870,
     50254,
     50258,
     50360,
     50361,
     50362
     "transcribe": 50359,
     "translate": 50358
   },
+  "transformers_version": "4.49.0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de867767fc6eaf2e6234a6b7ec854b66e0bba8f2f1a6a5202b3cee0100d7058f
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:235d9c59d4ecb2540a7b82c3ac4019d466a46586833c91f51d8d4c07556a2ff9
 size 966995080