Spaces:

mark-muhammad
/

ASR-Quran

Sleeping

App Files Files Community

mark-muhammad commited on Jan 28

Commit

7b51de6

1 Parent(s): f086138

Add initial implementation of ASR for Surah Al-Fatihah with Gradio interface

Browse files

Files changed (2) hide show

app.py +185 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import gradio as gr
+import difflib
+from transformers import pipeline
+import unicodedata
+# Initialize the ASR pipeline (model loaded once at startup)
+asr_pipeline = pipeline("automatic-speech-recognition", model="tarteel-ai/whisper-base-ar-quran")
+# Ground truth for Surah Al-Fatiha (each ayah)
+fateha_ayahs = {
+    1: "بِسْمِ اللَّهِ الرَّحْمَنِ الرَّحِيمِ",
+    2: "الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ",
+    3: "ٱلرَّحْمَنِ ٱلرَّحِيمِ",
+    4: "مَالِكِ يَوْمِ الدِّينِ",
+    5: "إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ",
+    6: "اهْدِنَا الصِّرَاطَ الْمُسْتَقِيمَ",
+    7: "صِرَاطَ الَّذِينَ أَنْعَمْتَ عَلَيْهِمْ غَيْرِ الْمَغْضُوبِ عَلَيْهِمْ وَلَا الضَّالِّينَ"
+}
+def remove_diacritics(text: str) -> str:
+    """Remove Arabic diacritics from text using Unicode normalization."""
+    normalized_text = unicodedata.normalize('NFKD', text)
+    return ''.join([c for c in normalized_text if not unicodedata.combining(c)])
+def compare_texts(ref: str, hyp: str, ignore_diacritics: bool = True):
+    """
+    Compare the reference (ground truth) and hypothesis (ASR output) texts word-by-word.
+    Detects:
+      - Missed words: present in ref but not in hyp.
+      - Incorrect words: substitutions.
+      - Extra words: inserted in hyp.
+    Returns:
+      - highlighted_str: the transcription with wrong/extra words highlighted in red (HTML).
+      - missed: list of missed words.
+      - incorrect: list of tuples (expected, produced) for substitution errors.
+      - extra: list of extra words.
+    """
+    if ignore_diacritics:
+        ref_norm = remove_diacritics(ref)
+        hyp_norm = remove_diacritics(hyp)
+    else:
+        ref_norm = ref
+        hyp_norm = hyp
+    ref_words = ref_norm.split()
+    hyp_words = hyp_norm.split()
+    matcher = difflib.SequenceMatcher(None, ref_words, hyp_words)
+    highlighted_transcription = []
+    missed = []
+    incorrect = []
+    extra = []
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        if tag == "equal":
+            highlighted_transcription.extend(hyp_words[j1:j2])
+        elif tag == "replace":
+            sub_len = min(i2 - i1, j2 - j1)
+            for idx in range(sub_len):
+                r_word = ref_words[i1 + idx]
+                h_word = hyp_words[j1 + idx]
+                highlighted_transcription.append(f"<span style='color:red'>{h_word}</span>")
+                incorrect.append((r_word, h_word))
+            if (i2 - i1) > sub_len:
+                missed.extend(ref_words[i1 + sub_len:i2])
+            if (j2 - j1) > sub_len:
+                for word in hyp_words[j1 + sub_len:j2]:
+                    highlighted_transcription.append(f"<span style='color:red'>{word}</span>")
+                    extra.append(word)
+        elif tag == "delete":
+            missed.extend(ref_words[i1:i2])
+        elif tag == "insert":
+            for word in hyp_words[j1:j2]:
+                highlighted_transcription.append(f"<span style='color:red'>{word}</span>")
+                extra.append(word)
+    highlighted_str = " ".join(highlighted_transcription)
+    return highlighted_str, missed, incorrect, extra
+def process_audio(verse_from, verse_to, audio_file):
+    print("[PROCESS] Initializing...")
+    verse_from = int(verse_from)
+    verse_to = int(verse_to)
+# def process_audio(verse_from, audio_file):
+    # verse_from = int(verse_from)
+    # verse_to = int(verse_from)
+    if verse_from not in fateha_ayahs or verse_to not in fateha_ayahs:
+        return "<p style='color:red'>Invalid verse number. Please choose a number between 1 and 7.</p>"
+    verse_number = f"{verse_from}" if verse_from == verse_to else f"{verse_from} - {verse_to}"
+    print(f"[PROCESS] Processing ayah: {verse_number}")
+    ground_truth = ""
+    n = verse_from
+    while n <= verse_to:
+        ground_truth = ground_truth + " " + fateha_ayahs[n]
+        n += 1
+    print(f"[PROCESS] Ayah ref: {ground_truth}")
+    # audio_file is a file path because we use type="filepath"
+    result = asr_pipeline(audio_file)
+    print(f"[PROCESS] Result: {result}")
+    transcription = result["text"]
+    highlighted_transcription, missed, incorrect, extra = compare_texts(
+        ground_truth, transcription, ignore_diacritics=False
+    )
+    html_output = f"""
+    <html>
+      <head>
+        <style>
+          body {{ font-family: Arial, sans-serif; margin: 20px; }}
+          table, th, td {{ border: 1px solid #ccc; border-collapse: collapse; padding: 8px; }}
+        </style>
+      </head>
+      <body>
+        <h2>Ground Truth (Verse {verse_number}):</h2>
+        <p>{ground_truth}</p>
+        <h2>Model Transcription:</h2>
+        <p>{transcription}</p>
+        <h2>Highlighted Transcription (mismatches in red):</h2>
+        <p>{highlighted_transcription}</p>
+        <h2>Differences:</h2>
+        <p><strong>Missed Words:</strong> {" ".join(missed) if missed else "None"}</p>
+        <p><strong>Incorrect Words (Expected -> Produced):</strong> {"; ".join([f"{exp} -> {prod}" for exp, prod in incorrect]) if incorrect else "None"}</p>
+        <p><strong>Extra Words:</strong> {" ".join(extra) if extra else "None"}</p>
+      </body>
+    </html>
+    """
+    return html_output
+def update_verse_to(verse_from):
+    n = verse_from
+    verse_to = []
+    while n <= 7:
+        verse_to.append(n)
+        n += 1
+    return gr.update(choices=verse_to, value=verse_from, interactive=True)
+with gr.Blocks(title="ASR Surah Al-Fatihah") as demo:
+    gr.HTML(
+            f"""
+            <div style="text-align: center;">
+                <h1 style="margin-bottom: 0;">ASR Surah Al-Fatihah</h1>
+            </div>
+            """
+        )
+    gr.Markdown("Demo pengecekan bacaan Al-Fatihah")
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                a_from = gr.Dropdown(
+                    choices=list(fateha_ayahs.keys()),
+                    value=1,
+                    label="Dari ayah",
+                    interactive=True,
+                    allow_custom_value=True
+                )
+                a_to = gr.Dropdown(
+                    choices=list(fateha_ayahs.keys()),
+                    value=1,
+                    label="Hingga ayah",
+                    interactive=True,
+                    allow_custom_value=True
+                )
+                a_from.change(
+                    fn=update_verse_to,
+                    inputs=[a_from],
+                    outputs=[a_to]
+                )
+            audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Unggah file atau rekam dengan mikrofon")
+            btn = gr.Button("Kirim", variant="primary")
+        with gr.Column():
+            output = gr.HTML(label="Hasil Analisis")
+    btn.click(
+        fn=process_audio,
+        inputs=[a_from, a_to, audio],
+        outputs=[output]
+    )
+# Launch
+if __name__ == "__main__":
+    demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+transformers
+torch