Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import difflib | |
| from transformers import pipeline | |
| import unicodedata | |
| # Initialize the ASR pipeline (model loaded once at startup) | |
| asr_pipeline = pipeline("automatic-speech-recognition", model="tarteel-ai/whisper-base-ar-quran") | |
| # Ground truth for Surah Al-Fatiha (each ayah) | |
| fateha_ayahs = { | |
| 1: "ุจูุณูู ู ุงูููููู ุงูุฑููุญูู ููู ุงูุฑููุญููู ู", | |
| 2: "ุงููุญูู ูุฏู ููููููู ุฑูุจูู ุงููุนูุงููู ูููู", | |
| 3: "ูฑูุฑููุญูู ููู ูฑูุฑููุญููู ู", | |
| 4: "ู ูุงูููู ููููู ู ุงูุฏููููู", | |
| 5: "ุฅููููุงูู ููุนูุจูุฏู ููุฅููููุงูู ููุณูุชูุนูููู", | |
| 6: "ุงููุฏูููุง ุงูุตููุฑูุงุทู ุงููู ูุณูุชููููู ู", | |
| 7: "ุตูุฑูุงุทู ุงูููุฐูููู ุฃูููุนูู ูุชู ุนูููููููู ู ุบูููุฑู ุงููู ูุบูุถููุจู ุนูููููููู ู ููููุง ุงูุถููุงูููููู" | |
| } | |
| def remove_diacritics(text: str) -> str: | |
| """Remove Arabic diacritics from text using Unicode normalization.""" | |
| normalized_text = unicodedata.normalize('NFKD', text) | |
| return ''.join([c for c in normalized_text if not unicodedata.combining(c)]) | |
| def compare_texts(ref: str, hyp: str, ignore_diacritics: bool = True): | |
| """ | |
| Compare the reference (ground truth) and hypothesis (ASR output) texts word-by-word. | |
| Detects: | |
| - Missed words: present in ref but not in hyp. | |
| - Incorrect words: substitutions. | |
| - Extra words: inserted in hyp. | |
| Returns: | |
| - highlighted_str: the transcription with wrong/extra words highlighted in red (HTML). | |
| - missed: list of missed words. | |
| - incorrect: list of tuples (expected, produced) for substitution errors. | |
| - extra: list of extra words. | |
| """ | |
| if ignore_diacritics: | |
| ref_norm = remove_diacritics(ref) | |
| hyp_norm = remove_diacritics(hyp) | |
| else: | |
| ref_norm = ref | |
| hyp_norm = hyp | |
| ref_words = ref_norm.split() | |
| hyp_words = hyp_norm.split() | |
| matcher = difflib.SequenceMatcher(None, ref_words, hyp_words) | |
| highlighted_transcription = [] | |
| missed = [] | |
| incorrect = [] | |
| extra = [] | |
| for tag, i1, i2, j1, j2 in matcher.get_opcodes(): | |
| if tag == "equal": | |
| highlighted_transcription.extend(hyp_words[j1:j2]) | |
| elif tag == "replace": | |
| sub_len = min(i2 - i1, j2 - j1) | |
| for idx in range(sub_len): | |
| r_word = ref_words[i1 + idx] | |
| h_word = hyp_words[j1 + idx] | |
| highlighted_transcription.append(f"<span style='color:red'>{h_word}</span>") | |
| incorrect.append((r_word, h_word)) | |
| if (i2 - i1) > sub_len: | |
| missed.extend(ref_words[i1 + sub_len:i2]) | |
| if (j2 - j1) > sub_len: | |
| for word in hyp_words[j1 + sub_len:j2]: | |
| highlighted_transcription.append(f"<span style='color:red'>{word}</span>") | |
| extra.append(word) | |
| elif tag == "delete": | |
| missed.extend(ref_words[i1:i2]) | |
| elif tag == "insert": | |
| for word in hyp_words[j1:j2]: | |
| highlighted_transcription.append(f"<span style='color:red'>{word}</span>") | |
| extra.append(word) | |
| highlighted_str = " ".join(highlighted_transcription) | |
| return highlighted_str, missed, incorrect, extra | |
| def process_audio(verse_from, verse_to, audio_file): | |
| print("[PROCESS] Initializing...") | |
| verse_from = int(verse_from) | |
| verse_to = int(verse_to) | |
| # def process_audio(verse_from, audio_file): | |
| # verse_from = int(verse_from) | |
| # verse_to = int(verse_from) | |
| if verse_from not in fateha_ayahs or verse_to not in fateha_ayahs: | |
| return "<p style='color:red'>Invalid verse number. Please choose a number between 1 and 7.</p>" | |
| verse_number = f"{verse_from}" if verse_from == verse_to else f"{verse_from} - {verse_to}" | |
| print(f"[PROCESS] Processing ayah: {verse_number}") | |
| ground_truth = "" | |
| n = verse_from | |
| while n <= verse_to: | |
| ground_truth = ground_truth + " " + fateha_ayahs[n] | |
| n += 1 | |
| print(f"[PROCESS] Ayah ref: {ground_truth}") | |
| # audio_file is a file path because we use type="filepath" | |
| result = asr_pipeline(audio_file) | |
| print(f"[PROCESS] Result: {result}") | |
| transcription = result["text"] | |
| highlighted_transcription, missed, incorrect, extra = compare_texts( | |
| ground_truth, transcription, ignore_diacritics=False | |
| ) | |
| html_output = f""" | |
| <html> | |
| <head> | |
| <style> | |
| body {{ font-family: Arial, sans-serif; margin: 20px; }} | |
| table, th, td {{ border: 1px solid #ccc; border-collapse: collapse; padding: 8px; }} | |
| </style> | |
| </head> | |
| <body> | |
| <h2>Ground Truth (Verse {verse_number}):</h2> | |
| <p>{ground_truth}</p> | |
| <h2>Model Transcription:</h2> | |
| <p>{transcription}</p> | |
| <h2>Highlighted Transcription (mismatches in red):</h2> | |
| <p>{highlighted_transcription}</p> | |
| <h2>Differences:</h2> | |
| <p><strong>Missed Words:</strong> {" ".join(missed) if missed else "None"}</p> | |
| <p><strong>Incorrect Words (Expected -> Produced):</strong> {"; ".join([f"{exp} -> {prod}" for exp, prod in incorrect]) if incorrect else "None"}</p> | |
| <p><strong>Extra Words:</strong> {" ".join(extra) if extra else "None"}</p> | |
| </body> | |
| </html> | |
| """ | |
| return html_output | |
| def update_verse_to(verse_from): | |
| n = verse_from | |
| verse_to = [] | |
| while n <= 7: | |
| verse_to.append(n) | |
| n += 1 | |
| return gr.update(choices=verse_to, value=verse_from, interactive=True) | |
| with gr.Blocks(title="ASR Surah Al-Fatihah") as demo: | |
| gr.HTML( | |
| f""" | |
| <div style="text-align: center;"> | |
| <h1 style="margin-bottom: 0;">ASR Surah Al-Fatihah</h1> | |
| </div> | |
| """ | |
| ) | |
| gr.Markdown("Demo pengecekan bacaan Al-Fatihah") | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| a_from = gr.Dropdown( | |
| choices=list(fateha_ayahs.keys()), | |
| value=1, | |
| label="Dari ayah", | |
| interactive=True, | |
| allow_custom_value=True | |
| ) | |
| a_to = gr.Dropdown( | |
| choices=list(fateha_ayahs.keys()), | |
| value=1, | |
| label="Hingga ayah", | |
| interactive=True, | |
| allow_custom_value=True | |
| ) | |
| a_from.change( | |
| fn=update_verse_to, | |
| inputs=[a_from], | |
| outputs=[a_to] | |
| ) | |
| audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Unggah file atau rekam dengan mikrofon") | |
| btn = gr.Button("Kirim", variant="primary") | |
| with gr.Column(): | |
| output = gr.HTML(label="Hasil Analisis") | |
| btn.click( | |
| fn=process_audio, | |
| inputs=[a_from, a_to, audio], | |
| outputs=[output] | |
| ) | |
| # Launch | |
| if __name__ == "__main__": | |
| demo.launch(share=True) | |