Spaces:
Running
Running
| # Gradio for Multi ASR | |
| import gradio as gr | |
| import torch | |
| import soundfile as sf | |
| from transformers import pipeline | |
| # Device setup | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| # Pipeline cache | |
| pipelines = {} | |
| def get_pipeline(language): | |
| """Load and cache model pipelines based on selected language.""" | |
| if language in pipelines: | |
| return pipelines[language] | |
| if language == "English": | |
| model_name = "openai/whisper-small" | |
| elif language == "Hindi": | |
| model_name = "vasista22/whisper-hindi-small" | |
| elif language == "Tamil": | |
| model_name = "vasista22/whisper-tamil-small" | |
| elif language == "Malayalam": | |
| model_name = "vrclc/W2V2-BERT-withLM-Malayalam-Studio" | |
| else: | |
| raise ValueError("Unsupported language") | |
| print(f"[INFO] Loading model for {language}: {model_name}") | |
| pipelines[language] = pipeline( | |
| "automatic-speech-recognition", | |
| model=model_name, | |
| device=device, | |
| chunk_length_s=10 | |
| ) | |
| return pipelines[language] | |
| # Transcription code with error debugging | |
| def transcribe(audio, language): | |
| """Transcribes speech from an audio file based on selected language.""" | |
| try: | |
| if audio is None: | |
| return "Please record or upload an audio file." | |
| print(f"[DEBUG] Language: {language}") | |
| print(f"[DEBUG] Received audio: {audio}") | |
| audio_path = audio if isinstance(audio, str) else audio.get("name", None) | |
| if audio_path is None: | |
| return "Could not read audio file." | |
| audio_data, sample_rate = sf.read(audio_path) | |
| print(f"[DEBUG] Sample rate: {sample_rate}, shape: {audio_data.shape}") | |
| pipe = get_pipeline(language) | |
| # Prepare input format for transformers pipeline | |
| input_data = {"array": audio_data, "sampling_rate": sample_rate} | |
| result = pipe(input_data)["text"] | |
| print(f"[DEBUG] Transcription: {result}") | |
| return result | |
| except Exception as e: | |
| import traceback | |
| print("[ERROR] Exception during transcription:") | |
| traceback.print_exc() | |
| return f"Error: {str(e)}" | |
| iface = gr.Interface( | |
| fn=transcribe, | |
| inputs=[ | |
| gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or Upload Audio"), | |
| gr.Dropdown(choices=["English", "Hindi", "Tamil", "Malayalam"], label="Select Language") | |
| ], | |
| outputs=gr.Textbox(label="Transcribed Text"), | |
| title="Multilingual Speech Recognition", | |
| description="Select a language and provide speech input to get transcription." | |
| ) | |
| iface.launch() | |