leenag's picture
Update app.py
09a9b03 verified
# Gradio for Multi ASR
import gradio as gr
import torch
import soundfile as sf
from transformers import pipeline
# Device setup
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# Pipeline cache
pipelines = {}
def get_pipeline(language):
"""Load and cache model pipelines based on selected language."""
if language in pipelines:
return pipelines[language]
if language == "English":
model_name = "openai/whisper-small"
elif language == "Hindi":
model_name = "vasista22/whisper-hindi-small"
elif language == "Tamil":
model_name = "vasista22/whisper-tamil-small"
elif language == "Malayalam":
model_name = "vrclc/W2V2-BERT-withLM-Malayalam-Studio"
else:
raise ValueError("Unsupported language")
print(f"[INFO] Loading model for {language}: {model_name}")
pipelines[language] = pipeline(
"automatic-speech-recognition",
model=model_name,
device=device,
chunk_length_s=10
)
return pipelines[language]
# Transcription code with error debugging
def transcribe(audio, language):
"""Transcribes speech from an audio file based on selected language."""
try:
if audio is None:
return "Please record or upload an audio file."
print(f"[DEBUG] Language: {language}")
print(f"[DEBUG] Received audio: {audio}")
audio_path = audio if isinstance(audio, str) else audio.get("name", None)
if audio_path is None:
return "Could not read audio file."
audio_data, sample_rate = sf.read(audio_path)
print(f"[DEBUG] Sample rate: {sample_rate}, shape: {audio_data.shape}")
pipe = get_pipeline(language)
# Prepare input format for transformers pipeline
input_data = {"array": audio_data, "sampling_rate": sample_rate}
result = pipe(input_data)["text"]
print(f"[DEBUG] Transcription: {result}")
return result
except Exception as e:
import traceback
print("[ERROR] Exception during transcription:")
traceback.print_exc()
return f"Error: {str(e)}"
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or Upload Audio"),
gr.Dropdown(choices=["English", "Hindi", "Tamil", "Malayalam"], label="Select Language")
],
outputs=gr.Textbox(label="Transcribed Text"),
title="Multilingual Speech Recognition",
description="Select a language and provide speech input to get transcription."
)
iface.launch()