Quran_ASR-API / app.py
aboalaa147's picture
Update app.py
7505690 verified
import os
import torch
import librosa
import soundfile as sf
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
import gradio as gr
MODEL_ID = "xLeonSTES/quran-to-text-base"
SAMPLE_RATE = 16000
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@torch.no_grad()
def load_model():
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_ID)
model.to(DEVICE)
model.eval()
return processor, model
processor, model = load_model()
def resample_to_16k(path):
audio, sr = sf.read(path)
if audio.ndim > 1:
audio = audio.mean(axis=1)
if sr != SAMPLE_RATE:
audio = librosa.resample(audio.astype('float32'), orig_sr=sr, target_sr=SAMPLE_RATE)
return audio, SAMPLE_RATE
def transcribe_audio(path):
audio, sr = resample_to_16k(path)
audio = audio / (max(abs(audio)) + 1e-9)
inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
input_features = inputs.input_features.to(DEVICE)
with torch.no_grad():
generated_ids = model.generate(input_features)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return text
def run(uploaded_audio, mic_audio):
path = mic_audio or uploaded_audio
if not path:
return "No audio provided"
try:
return transcribe_audio(path)
except Exception as e:
return f"Error: {e}"
with gr.Blocks(title="Quran ASR") as demo:
gr.Markdown("# Quran ASR — Diacritized Transcription\nUpload or record audio, then press Convert.")
with gr.Row():
with gr.Column():
upload = gr.Audio(type="filepath", label="Upload Audio")
mic = gr.Audio(type="filepath", label="Microphone Recording")
btn = gr.Button("Convert")
with gr.Column():
out = gr.Textbox(label="Output Text", lines=10)
btn.click(run, inputs=[upload, mic], outputs=[out])
demo.launch()