File size: 1,974 Bytes
7505690
58ed92a
b2cde20
7505690
 
 
58ed92a
7505690
 
 
58ed92a
7505690
 
 
 
 
 
 
58ed92a
7505690
b2cde20
7505690
30f4a9a
7505690
30f4a9a
7505690
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30f4a9a
7505690
30f4a9a
7505690
58ed92a
7505690
 
58ed92a
 
 
7505690
 
 
58ed92a
7505690
58ed92a
7505690
58ed92a
7505690
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import torch
import librosa
import soundfile as sf
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
import gradio as gr

MODEL_ID = "xLeonSTES/quran-to-text-base"
SAMPLE_RATE = 16000
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

@torch.no_grad()
def load_model():
    processor = AutoProcessor.from_pretrained(MODEL_ID)
    model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_ID)
    model.to(DEVICE)
    model.eval()
    return processor, model

processor, model = load_model()

def resample_to_16k(path):
    audio, sr = sf.read(path)
    if audio.ndim > 1:
        audio = audio.mean(axis=1)
    if sr != SAMPLE_RATE:
        audio = librosa.resample(audio.astype('float32'), orig_sr=sr, target_sr=SAMPLE_RATE)
    return audio, SAMPLE_RATE

def transcribe_audio(path):
    audio, sr = resample_to_16k(path)
    audio = audio / (max(abs(audio)) + 1e-9)
    inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
    input_features = inputs.input_features.to(DEVICE)

    with torch.no_grad():
        generated_ids = model.generate(input_features)

    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return text

def run(uploaded_audio, mic_audio):
    path = mic_audio or uploaded_audio
    if not path:
        return "No audio provided"
    try:
        return transcribe_audio(path)
    except Exception as e:
        return f"Error: {e}"

with gr.Blocks(title="Quran ASR") as demo:
    gr.Markdown("# Quran ASR — Diacritized Transcription\nUpload or record audio, then press Convert.")

    with gr.Row():
        with gr.Column():
            upload = gr.Audio(type="filepath", label="Upload Audio")
            mic = gr.Audio(type="filepath", label="Microphone Recording")
            btn = gr.Button("Convert")
        with gr.Column():
            out = gr.Textbox(label="Output Text", lines=10)

    btn.click(run, inputs=[upload, mic], outputs=[out])

demo.launch()