Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -122,7 +122,7 @@ def load_audio_file(file_path):
|
|
| 122 |
|
| 123 |
# --- Transcription Function ---
|
| 124 |
|
| 125 |
-
def transcribe_audio(audio_file_path):
|
| 126 |
"""
|
| 127 |
Transcribes an audio file using the pre-loaded Whisper model.
|
| 128 |
"""
|
|
@@ -132,6 +132,14 @@ def transcribe_audio(audio_file_path):
|
|
| 132 |
if audio_file_path is None:
|
| 133 |
return "Error: No audio file provided."
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
try:
|
| 136 |
# Load audio using the robust loader and get the 16kHz mono tensor
|
| 137 |
audio, sr = load_audio_file(audio_file_path)
|
|
@@ -143,7 +151,7 @@ def transcribe_audio(audio_file_path):
|
|
| 143 |
# Move inputs to the appropriate device
|
| 144 |
input_features = inputs.input_features.to(device)
|
| 145 |
|
| 146 |
-
forced_ids = processor.get_decoder_prompt_ids(language=
|
| 147 |
|
| 148 |
gen_config = GenerationConfig(
|
| 149 |
forced_decoder_ids=forced_ids,
|
|
@@ -170,6 +178,12 @@ def transcribe_audio(audio_file_path):
|
|
| 170 |
title = "Whisper Small Uz v1: Multilingual audio transcription"
|
| 171 |
description = "A Gradio demo for the **OvozifyLabs/whisper-small-uz-v1** model for Uzbek ASR. Upload an audio file (M4A, MP3, WAV supported) or record directly."
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
# π€ Input Component
|
| 174 |
audio_input = gr.Audio(
|
| 175 |
sources=["microphone", "upload"],
|
|
@@ -178,12 +192,12 @@ audio_input = gr.Audio(
|
|
| 178 |
)
|
| 179 |
|
| 180 |
# π Output Component
|
| 181 |
-
text_output = gr.Textbox(label="Transcription Result", lines=6, max_lines = 25
|
| 182 |
|
| 183 |
# π Create the Interface
|
| 184 |
demo = gr.Interface(
|
| 185 |
fn=transcribe_audio,
|
| 186 |
-
inputs=audio_input,
|
| 187 |
outputs=text_output,
|
| 188 |
title=title,
|
| 189 |
description=description,
|
|
|
|
| 122 |
|
| 123 |
# --- Transcription Function ---
|
| 124 |
|
| 125 |
+
def transcribe_audio(audio_file_path, language):
|
| 126 |
"""
|
| 127 |
Transcribes an audio file using the pre-loaded Whisper model.
|
| 128 |
"""
|
|
|
|
| 132 |
if audio_file_path is None:
|
| 133 |
return "Error: No audio file provided."
|
| 134 |
|
| 135 |
+
lang_dict = {
|
| 136 |
+
"Uzbek": "uz",
|
| 137 |
+
"Russian": "ru",
|
| 138 |
+
"English": "en"
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
language = lang_dict[language]
|
| 142 |
+
|
| 143 |
try:
|
| 144 |
# Load audio using the robust loader and get the 16kHz mono tensor
|
| 145 |
audio, sr = load_audio_file(audio_file_path)
|
|
|
|
| 151 |
# Move inputs to the appropriate device
|
| 152 |
input_features = inputs.input_features.to(device)
|
| 153 |
|
| 154 |
+
forced_ids = processor.get_decoder_prompt_ids(language=language, task="transcribe")
|
| 155 |
|
| 156 |
gen_config = GenerationConfig(
|
| 157 |
forced_decoder_ids=forced_ids,
|
|
|
|
| 178 |
title = "Whisper Small Uz v1: Multilingual audio transcription"
|
| 179 |
description = "A Gradio demo for the **OvozifyLabs/whisper-small-uz-v1** model for Uzbek ASR. Upload an audio file (M4A, MP3, WAV supported) or record directly."
|
| 180 |
|
| 181 |
+
language_input = gr.Dropdown(
|
| 182 |
+
label="Select Language",
|
| 183 |
+
choices=["Uzbek", "English", "Russian"],
|
| 184 |
+
value="Uzbek" # default
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
# π€ Input Component
|
| 188 |
audio_input = gr.Audio(
|
| 189 |
sources=["microphone", "upload"],
|
|
|
|
| 192 |
)
|
| 193 |
|
| 194 |
# π Output Component
|
| 195 |
+
text_output = gr.Textbox(label="Transcription Result", lines=6, max_lines = 25)
|
| 196 |
|
| 197 |
# π Create the Interface
|
| 198 |
demo = gr.Interface(
|
| 199 |
fn=transcribe_audio,
|
| 200 |
+
inputs=[audio_input, language_input],
|
| 201 |
outputs=text_output,
|
| 202 |
title=title,
|
| 203 |
description=description,
|