Spaces:
Sleeping
Sleeping
File size: 2,591 Bytes
09a9b03 b1133a6 cb0a694 c48d761 cb0a694 c48d761 5d13a75 c48d761 5d13a75 c48d761 5d13a75 c48d761 09a9b03 c48d761 c838225 c48d761 c838225 c48d761 c838225 c48d761 c838225 c48d761 c838225 c48d761 c838225 cb0a694 c48d761 cb0a694 c48d761 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# Gradio for Multi ASR
import gradio as gr
import torch
import soundfile as sf
from transformers import pipeline
# Device setup
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# Pipeline cache
pipelines = {}
def get_pipeline(language):
"""Load and cache model pipelines based on selected language."""
if language in pipelines:
return pipelines[language]
if language == "English":
model_name = "openai/whisper-small"
elif language == "Hindi":
model_name = "vasista22/whisper-hindi-small"
elif language == "Tamil":
model_name = "vasista22/whisper-tamil-small"
elif language == "Malayalam":
model_name = "vrclc/W2V2-BERT-withLM-Malayalam-Studio"
else:
raise ValueError("Unsupported language")
print(f"[INFO] Loading model for {language}: {model_name}")
pipelines[language] = pipeline(
"automatic-speech-recognition",
model=model_name,
device=device,
chunk_length_s=10
)
return pipelines[language]
# Transcription code with error debugging
def transcribe(audio, language):
"""Transcribes speech from an audio file based on selected language."""
try:
if audio is None:
return "Please record or upload an audio file."
print(f"[DEBUG] Language: {language}")
print(f"[DEBUG] Received audio: {audio}")
audio_path = audio if isinstance(audio, str) else audio.get("name", None)
if audio_path is None:
return "Could not read audio file."
audio_data, sample_rate = sf.read(audio_path)
print(f"[DEBUG] Sample rate: {sample_rate}, shape: {audio_data.shape}")
pipe = get_pipeline(language)
# Prepare input format for transformers pipeline
input_data = {"array": audio_data, "sampling_rate": sample_rate}
result = pipe(input_data)["text"]
print(f"[DEBUG] Transcription: {result}")
return result
except Exception as e:
import traceback
print("[ERROR] Exception during transcription:")
traceback.print_exc()
return f"Error: {str(e)}"
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or Upload Audio"),
gr.Dropdown(choices=["English", "Hindi", "Tamil", "Malayalam"], label="Select Language")
],
outputs=gr.Textbox(label="Transcribed Text"),
title="Multilingual Speech Recognition",
description="Select a language and provide speech input to get transcription."
)
iface.launch()
|