Commit
·
1571261
1
Parent(s):
5d4c1da
Add model size options for whisper
Browse files- app.py +6 -1
- backend/audio_to_tgt.py +2 -2
app.py
CHANGED
|
@@ -45,7 +45,12 @@ description_audio = "Upload an audio file to extract text and translate it to En
|
|
| 45 |
|
| 46 |
audio_interface = gr.Interface(
|
| 47 |
fn=src_audio_to_eng_translator,
|
| 48 |
-
inputs=gr.Audio(label="Upload an Audio file", type="filepath"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
outputs=gr.Textbox(label="Translated Text in English"),
|
| 50 |
title=heading_audio,
|
| 51 |
description=description_audio
|
|
|
|
| 45 |
|
| 46 |
audio_interface = gr.Interface(
|
| 47 |
fn=src_audio_to_eng_translator,
|
| 48 |
+
inputs=[gr.Audio(label="Upload an Audio file", type="filepath"),
|
| 49 |
+
gr.Dropdown(
|
| 50 |
+
choices=["turbo", "base", "tiny", "small", "medium", "large"],
|
| 51 |
+
label="Select Whisper Model size",
|
| 52 |
+
)
|
| 53 |
+
],
|
| 54 |
outputs=gr.Textbox(label="Translated Text in English"),
|
| 55 |
title=heading_audio,
|
| 56 |
description=description_audio
|
backend/audio_to_tgt.py
CHANGED
|
@@ -10,10 +10,10 @@ def audio_to_numpy(audio_file_input):
|
|
| 10 |
|
| 11 |
return samples / np.iinfo(audio.array_type).max
|
| 12 |
|
| 13 |
-
def src_audio_to_eng_translator(audio_file_input):
|
| 14 |
audio_data = audio_to_numpy(audio_file_input)
|
| 15 |
|
| 16 |
-
model = whisper.load_model(
|
| 17 |
result = model.transcribe(audio_data)
|
| 18 |
|
| 19 |
translated_text = GoogleTranslator(source='auto', target='en').translate(result["text"])
|
|
|
|
| 10 |
|
| 11 |
return samples / np.iinfo(audio.array_type).max
|
| 12 |
|
| 13 |
+
def src_audio_to_eng_translator(audio_file_input, model_size = "turbo"):
|
| 14 |
audio_data = audio_to_numpy(audio_file_input)
|
| 15 |
|
| 16 |
+
model = whisper.load_model(model_size)
|
| 17 |
result = model.transcribe(audio_data)
|
| 18 |
|
| 19 |
translated_text = GoogleTranslator(source='auto', target='en').translate(result["text"])
|