Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ torch.set_num_threads(2)
|
|
| 11 |
device = "cpu"
|
| 12 |
torch_dtype = torch.float32
|
| 13 |
|
| 14 |
-
model_id = "openai/whisper-
|
| 15 |
|
| 16 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 17 |
model_id,
|
|
@@ -89,7 +89,7 @@ def transcribe_microphone(audio_data, task="transcribe", language="auto", return
|
|
| 89 |
"num_beams": 1,
|
| 90 |
"do_sample": False,
|
| 91 |
"temperature": 0.0,
|
| 92 |
-
"max_new_tokens":
|
| 93 |
"compression_ratio_threshold": 1.35,
|
| 94 |
"logprob_threshold": -1.0,
|
| 95 |
"no_speech_threshold": 0.6,
|
|
@@ -156,9 +156,9 @@ languages = [
|
|
| 156 |
("Latin", "la"),
|
| 157 |
]
|
| 158 |
|
| 159 |
-
with gr.Blocks(title="Whisper
|
| 160 |
-
gr.Markdown("# 🎤 Whisper
|
| 161 |
-
gr.Markdown("Upload an audio file or record directly to get
|
| 162 |
|
| 163 |
with gr.Tab("Upload Audio File"):
|
| 164 |
with gr.Row():
|
|
@@ -241,12 +241,12 @@ with gr.Blocks(title="Whisper Tiny - Speech to Text") as demo:
|
|
| 241 |
)
|
| 242 |
|
| 243 |
gr.Markdown("### Features:")
|
| 244 |
-
gr.Markdown("- **
|
| 245 |
gr.Markdown("- **CPU Optimized**: Optimized for 2-core CPU with 16GB RAM")
|
| 246 |
gr.Markdown("- **Multi-language**: Supports 99+ languages")
|
| 247 |
gr.Markdown("- **Translation**: Can translate speech to English")
|
| 248 |
gr.Markdown("- **Timestamps**: Optional word-level or sentence-level timestamps")
|
| 249 |
-
gr.Markdown("- **
|
| 250 |
|
| 251 |
if __name__ == "__main__":
|
| 252 |
demo.launch(
|
|
|
|
| 11 |
device = "cpu"
|
| 12 |
torch_dtype = torch.float32
|
| 13 |
|
| 14 |
+
model_id = "openai/whisper-base"
|
| 15 |
|
| 16 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 17 |
model_id,
|
|
|
|
| 89 |
"num_beams": 1,
|
| 90 |
"do_sample": False,
|
| 91 |
"temperature": 0.0,
|
| 92 |
+
"max_new_tokens": 220,
|
| 93 |
"compression_ratio_threshold": 1.35,
|
| 94 |
"logprob_threshold": -1.0,
|
| 95 |
"no_speech_threshold": 0.6,
|
|
|
|
| 156 |
("Latin", "la"),
|
| 157 |
]
|
| 158 |
|
| 159 |
+
with gr.Blocks(title="Whisper Base - Speech to Text") as demo:
|
| 160 |
+
gr.Markdown("# 🎤 Whisper Base - Speech to Text")
|
| 161 |
+
gr.Markdown("Upload an audio file or record directly to get accurate transcription using OpenAI's Whisper Base model (74M parameters).")
|
| 162 |
|
| 163 |
with gr.Tab("Upload Audio File"):
|
| 164 |
with gr.Row():
|
|
|
|
| 241 |
)
|
| 242 |
|
| 243 |
gr.Markdown("### Features:")
|
| 244 |
+
gr.Markdown("- **Balanced Performance**: Powered by Whisper Base model (74M parameters)")
|
| 245 |
gr.Markdown("- **CPU Optimized**: Optimized for 2-core CPU with 16GB RAM")
|
| 246 |
gr.Markdown("- **Multi-language**: Supports 99+ languages")
|
| 247 |
gr.Markdown("- **Translation**: Can translate speech to English")
|
| 248 |
gr.Markdown("- **Timestamps**: Optional word-level or sentence-level timestamps")
|
| 249 |
+
gr.Markdown("- **Good Accuracy**: Better accuracy than Tiny with reasonable speed")
|
| 250 |
|
| 251 |
if __name__ == "__main__":
|
| 252 |
demo.launch(
|