nixaut-codelabs commited on
Commit
c816da3
·
verified ·
1 Parent(s): 99c2dd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -11,7 +11,7 @@ torch.set_num_threads(2)
11
  device = "cpu"
12
  torch_dtype = torch.float32
13
 
14
- model_id = "openai/whisper-tiny"
15
 
16
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
  model_id,
@@ -89,7 +89,7 @@ def transcribe_microphone(audio_data, task="transcribe", language="auto", return
89
  "num_beams": 1,
90
  "do_sample": False,
91
  "temperature": 0.0,
92
- "max_new_tokens": 448,
93
  "compression_ratio_threshold": 1.35,
94
  "logprob_threshold": -1.0,
95
  "no_speech_threshold": 0.6,
@@ -156,9 +156,9 @@ languages = [
156
  ("Latin", "la"),
157
  ]
158
 
159
- with gr.Blocks(title="Whisper Tiny - Speech to Text") as demo:
160
- gr.Markdown("# 🎤 Whisper Tiny - Speech to Text")
161
- gr.Markdown("Upload an audio file or record directly to get fast transcription using OpenAI's Whisper Tiny model (39M parameters).")
162
 
163
  with gr.Tab("Upload Audio File"):
164
  with gr.Row():
@@ -241,12 +241,12 @@ with gr.Blocks(title="Whisper Tiny - Speech to Text") as demo:
241
  )
242
 
243
  gr.Markdown("### Features:")
244
- gr.Markdown("- **Lightweight**: Powered by Whisper Tiny model (39M parameters)")
245
  gr.Markdown("- **CPU Optimized**: Optimized for 2-core CPU with 16GB RAM")
246
  gr.Markdown("- **Multi-language**: Supports 99+ languages")
247
  gr.Markdown("- **Translation**: Can translate speech to English")
248
  gr.Markdown("- **Timestamps**: Optional word-level or sentence-level timestamps")
249
- gr.Markdown("- **Fast Processing**: Smallest Whisper model for maximum speed")
250
 
251
  if __name__ == "__main__":
252
  demo.launch(
 
11
  device = "cpu"
12
  torch_dtype = torch.float32
13
 
14
+ model_id = "openai/whisper-base"
15
 
16
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
  model_id,
 
89
  "num_beams": 1,
90
  "do_sample": False,
91
  "temperature": 0.0,
92
+ "max_new_tokens": 220,
93
  "compression_ratio_threshold": 1.35,
94
  "logprob_threshold": -1.0,
95
  "no_speech_threshold": 0.6,
 
156
  ("Latin", "la"),
157
  ]
158
 
159
+ with gr.Blocks(title="Whisper Base - Speech to Text") as demo:
160
+ gr.Markdown("# 🎤 Whisper Base - Speech to Text")
161
+ gr.Markdown("Upload an audio file or record directly to get accurate transcription using OpenAI's Whisper Base model (74M parameters).")
162
 
163
  with gr.Tab("Upload Audio File"):
164
  with gr.Row():
 
241
  )
242
 
243
  gr.Markdown("### Features:")
244
+ gr.Markdown("- **Balanced Performance**: Powered by Whisper Base model (74M parameters)")
245
  gr.Markdown("- **CPU Optimized**: Optimized for 2-core CPU with 16GB RAM")
246
  gr.Markdown("- **Multi-language**: Supports 99+ languages")
247
  gr.Markdown("- **Translation**: Can translate speech to English")
248
  gr.Markdown("- **Timestamps**: Optional word-level or sentence-level timestamps")
249
+ gr.Markdown("- **Good Accuracy**: Better accuracy than Tiny with reasonable speed")
250
 
251
  if __name__ == "__main__":
252
  demo.launch(