Spaces:
Sleeping
Sleeping
File size: 1,974 Bytes
7505690 58ed92a b2cde20 7505690 58ed92a 7505690 58ed92a 7505690 58ed92a 7505690 b2cde20 7505690 30f4a9a 7505690 30f4a9a 7505690 30f4a9a 7505690 30f4a9a 7505690 58ed92a 7505690 58ed92a 7505690 58ed92a 7505690 58ed92a 7505690 58ed92a 7505690 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import os
import torch
import librosa
import soundfile as sf
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
import gradio as gr
MODEL_ID = "xLeonSTES/quran-to-text-base"
SAMPLE_RATE = 16000
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@torch.no_grad()
def load_model():
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_ID)
model.to(DEVICE)
model.eval()
return processor, model
processor, model = load_model()
def resample_to_16k(path):
audio, sr = sf.read(path)
if audio.ndim > 1:
audio = audio.mean(axis=1)
if sr != SAMPLE_RATE:
audio = librosa.resample(audio.astype('float32'), orig_sr=sr, target_sr=SAMPLE_RATE)
return audio, SAMPLE_RATE
def transcribe_audio(path):
audio, sr = resample_to_16k(path)
audio = audio / (max(abs(audio)) + 1e-9)
inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
input_features = inputs.input_features.to(DEVICE)
with torch.no_grad():
generated_ids = model.generate(input_features)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return text
def run(uploaded_audio, mic_audio):
path = mic_audio or uploaded_audio
if not path:
return "No audio provided"
try:
return transcribe_audio(path)
except Exception as e:
return f"Error: {e}"
with gr.Blocks(title="Quran ASR") as demo:
gr.Markdown("# Quran ASR — Diacritized Transcription\nUpload or record audio, then press Convert.")
with gr.Row():
with gr.Column():
upload = gr.Audio(type="filepath", label="Upload Audio")
mic = gr.Audio(type="filepath", label="Microphone Recording")
btn = gr.Button("Convert")
with gr.Column():
out = gr.Textbox(label="Output Text", lines=10)
btn.click(run, inputs=[upload, mic], outputs=[out])
demo.launch()
|