Spaces:

mkfallah
/

pasr

Sleeping

App Files Files Community

mkfallah commited on Sep 4, 2025

Commit

9f5d540

verified ·

1 Parent(s): 68f2a89

Create app.py

Browse files

Files changed (1) hide show

app.py +53 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import gradio as gr
+from transformers import pipeline
+from rapidfuzz import process, fuzz
+import tempfile
+import soundfile as sf
+# --- ASR pipeline ---
+asr = pipeline(
+    task="automatic-speech-recognition",
+    model="vhdm/whisper-large-fa-v1",
+    device=-1  # CPU
+)
+# --- Custom vocabulary with multiple forms for accuracy ---
+custom_vocab_map = {
+    "نرد": ["نرد", "نِرد", "نَرد"],
+    "کامپیوتر": ["کامپیوتر", "کامپیوتره"],
+    "هوش مصنوعی": ["هوش مصنوعی", "هوش صنعتی"],
+    "ماشین": ["ماشین", "ماشینه"]
+}
+def replace_fuzzy(text, vocab_map, threshold=85):
+    """
+    Replace words/phrases in text using fuzzy matching with high threshold.
+    Supports multiple alternatives per word/phrase.
+    """
+    for target, alternatives in vocab_map.items():
+        # find best match among alternatives
+        match, score = process.extractOne(text, alternatives, scorer=fuzz.partial_ratio)
+        if score >= threshold:
+            text = text.replace(match, target)
+    return text
+def transcribe(audio):
+    # audio is a tuple (numpy array, sample_rate)
+    with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
+        sf.write(tmp.name, audio[0], samplerate=audio[1])
+        # ASR with chunking for long audios
+        result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
+    text = result["text"]
+    final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
+    return final_text
+# --- Gradio interface ---
+iface = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(type="numpy"),
+    outputs="text",
+    title="Persian ASR with High Accuracy Vocabulary",
+    description="Upload a Persian audio file; recognized words are corrected using a custom high-accuracy vocabulary."
+)
+iface.launch()