Spaces:

OvozifyLabs
/

STT-whisper-small

Running

App Files Files Community

Firdavs222 commited on 6 days ago

Commit

c4c4cd7

verified ·

1 Parent(s): 64241ee

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -4

app.py CHANGED Viewed

@@ -122,7 +122,7 @@ def load_audio_file(file_path):
 # --- Transcription Function ---
-def transcribe_audio(audio_file_path):
     """
     Transcribes an audio file using the pre-loaded Whisper model.
     """
@@ -132,6 +132,14 @@ def transcribe_audio(audio_file_path):
     if audio_file_path is None:
         return "Error: No audio file provided."
     try:
         # Load audio using the robust loader and get the 16kHz mono tensor
         audio, sr = load_audio_file(audio_file_path)
@@ -143,7 +151,7 @@ def transcribe_audio(audio_file_path):
         # Move inputs to the appropriate device
         input_features = inputs.input_features.to(device)
-        forced_ids = processor.get_decoder_prompt_ids(language="uz", task="transcribe")
         gen_config = GenerationConfig(
             forced_decoder_ids=forced_ids,
@@ -170,6 +178,12 @@ def transcribe_audio(audio_file_path):
 title = "Whisper Small Uz v1: Multilingual audio transcription"
 description = "A Gradio demo for the **OvozifyLabs/whisper-small-uz-v1** model for Uzbek ASR. Upload an audio file (M4A, MP3, WAV supported) or record directly."
 # 🎤 Input Component
 audio_input = gr.Audio(
     sources=["microphone", "upload"],
@@ -178,12 +192,12 @@ audio_input = gr.Audio(
 )
 # 📝 Output Component
-text_output = gr.Textbox(label="Transcription Result", lines=6, max_lines = 25, show_label=True, show_copy_button=True)
 # 🚀 Create the Interface
 demo = gr.Interface(
     fn=transcribe_audio,
-    inputs=audio_input,
     outputs=text_output,
     title=title,
     description=description,

 # --- Transcription Function ---
+def transcribe_audio(audio_file_path, language):
     """
     Transcribes an audio file using the pre-loaded Whisper model.
     """
     if audio_file_path is None:
         return "Error: No audio file provided."
+    lang_dict = {
+        "Uzbek": "uz",
+        "Russian": "ru",
+        "English": "en"
+    }
+    language = lang_dict[language]
     try:
         # Load audio using the robust loader and get the 16kHz mono tensor
         audio, sr = load_audio_file(audio_file_path)
         # Move inputs to the appropriate device
         input_features = inputs.input_features.to(device)
+        forced_ids = processor.get_decoder_prompt_ids(language=language, task="transcribe")
         gen_config = GenerationConfig(
             forced_decoder_ids=forced_ids,
 title = "Whisper Small Uz v1: Multilingual audio transcription"
 description = "A Gradio demo for the **OvozifyLabs/whisper-small-uz-v1** model for Uzbek ASR. Upload an audio file (M4A, MP3, WAV supported) or record directly."
+language_input = gr.Dropdown(
+    label="Select Language",
+    choices=["Uzbek", "English", "Russian"],
+    value="Uzbek"     # default
+)
 # 🎤 Input Component
 audio_input = gr.Audio(
     sources=["microphone", "upload"],
 )
 # 📝 Output Component
+text_output = gr.Textbox(label="Transcription Result", lines=6, max_lines = 25)
 # 🚀 Create the Interface
 demo = gr.Interface(
     fn=transcribe_audio,
+    inputs=[audio_input, language_input],
     outputs=text_output,
     title=title,
     description=description,