whisper-base

Sleeping

App Files Files Community

nixaut-codelabs commited on Sep 11, 2025

Commit

c816da3

verified ·

1 Parent(s): 99c2dd3

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ torch.set_num_threads(2)
 device = "cpu"
 torch_dtype = torch.float32
-model_id = "openai/whisper-tiny"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     model_id,
@@ -89,7 +89,7 @@ def transcribe_microphone(audio_data, task="transcribe", language="auto", return
                 "num_beams": 1,
                 "do_sample": False,
                 "temperature": 0.0,
-                "max_new_tokens": 448,
                 "compression_ratio_threshold": 1.35,
                 "logprob_threshold": -1.0,
                 "no_speech_threshold": 0.6,
@@ -156,9 +156,9 @@ languages = [
     ("Latin", "la"),
 ]
-with gr.Blocks(title="Whisper Tiny - Speech to Text") as demo:
-    gr.Markdown("# 🎤 Whisper Tiny - Speech to Text")
-    gr.Markdown("Upload an audio file or record directly to get fast transcription using OpenAI's Whisper Tiny model (39M parameters).")
     with gr.Tab("Upload Audio File"):
         with gr.Row():
@@ -241,12 +241,12 @@ with gr.Blocks(title="Whisper Tiny - Speech to Text") as demo:
     )
     gr.Markdown("### Features:")
-    gr.Markdown("- **Lightweight**: Powered by Whisper Tiny model (39M parameters)")
     gr.Markdown("- **CPU Optimized**: Optimized for 2-core CPU with 16GB RAM")
     gr.Markdown("- **Multi-language**: Supports 99+ languages")
     gr.Markdown("- **Translation**: Can translate speech to English")
     gr.Markdown("- **Timestamps**: Optional word-level or sentence-level timestamps")
-    gr.Markdown("- **Fast Processing**: Smallest Whisper model for maximum speed")
 if __name__ == "__main__":
     demo.launch(

 device = "cpu"
 torch_dtype = torch.float32
+model_id = "openai/whisper-base"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     model_id,
                 "num_beams": 1,
                 "do_sample": False,
                 "temperature": 0.0,
+                "max_new_tokens": 220,
                 "compression_ratio_threshold": 1.35,
                 "logprob_threshold": -1.0,
                 "no_speech_threshold": 0.6,
     ("Latin", "la"),
 ]
+with gr.Blocks(title="Whisper Base - Speech to Text") as demo:
+    gr.Markdown("# 🎤 Whisper Base - Speech to Text")
+    gr.Markdown("Upload an audio file or record directly to get accurate transcription using OpenAI's Whisper Base model (74M parameters).")
     with gr.Tab("Upload Audio File"):
         with gr.Row():
     )
     gr.Markdown("### Features:")
+    gr.Markdown("- **Balanced Performance**: Powered by Whisper Base model (74M parameters)")
     gr.Markdown("- **CPU Optimized**: Optimized for 2-core CPU with 16GB RAM")
     gr.Markdown("- **Multi-language**: Supports 99+ languages")
     gr.Markdown("- **Translation**: Can translate speech to English")
     gr.Markdown("- **Timestamps**: Optional word-level or sentence-level timestamps")
+    gr.Markdown("- **Good Accuracy**: Better accuracy than Tiny with reasonable speed")
 if __name__ == "__main__":
     demo.launch(