Spaces:
Running
Running
File size: 7,146 Bytes
7b51de6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 | import gradio as gr
import difflib
from transformers import pipeline
import unicodedata
# Initialize the ASR pipeline (model loaded once at startup)
asr_pipeline = pipeline("automatic-speech-recognition", model="tarteel-ai/whisper-base-ar-quran")
# Ground truth for Surah Al-Fatiha (each ayah)
fateha_ayahs = {
1: "ุจูุณูู
ู ุงูููููู ุงูุฑููุญูู
ููู ุงูุฑููุญููู
ู",
2: "ุงููุญูู
ูุฏู ููููููู ุฑูุจูู ุงููุนูุงููู
ูููู",
3: "ูฑูุฑููุญูู
ููู ูฑูุฑููุญููู
ู",
4: "ู
ูุงูููู ููููู
ู ุงูุฏููููู",
5: "ุฅููููุงูู ููุนูุจูุฏู ููุฅููููุงูู ููุณูุชูุนูููู",
6: "ุงููุฏูููุง ุงูุตููุฑูุงุทู ุงููู
ูุณูุชููููู
ู",
7: "ุตูุฑูุงุทู ุงูููุฐูููู ุฃูููุนูู
ูุชู ุนูููููููู
ู ุบูููุฑู ุงููู
ูุบูุถููุจู ุนูููููููู
ู ููููุง ุงูุถููุงูููููู"
}
def remove_diacritics(text: str) -> str:
"""Remove Arabic diacritics from text using Unicode normalization."""
normalized_text = unicodedata.normalize('NFKD', text)
return ''.join([c for c in normalized_text if not unicodedata.combining(c)])
def compare_texts(ref: str, hyp: str, ignore_diacritics: bool = True):
"""
Compare the reference (ground truth) and hypothesis (ASR output) texts word-by-word.
Detects:
- Missed words: present in ref but not in hyp.
- Incorrect words: substitutions.
- Extra words: inserted in hyp.
Returns:
- highlighted_str: the transcription with wrong/extra words highlighted in red (HTML).
- missed: list of missed words.
- incorrect: list of tuples (expected, produced) for substitution errors.
- extra: list of extra words.
"""
if ignore_diacritics:
ref_norm = remove_diacritics(ref)
hyp_norm = remove_diacritics(hyp)
else:
ref_norm = ref
hyp_norm = hyp
ref_words = ref_norm.split()
hyp_words = hyp_norm.split()
matcher = difflib.SequenceMatcher(None, ref_words, hyp_words)
highlighted_transcription = []
missed = []
incorrect = []
extra = []
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == "equal":
highlighted_transcription.extend(hyp_words[j1:j2])
elif tag == "replace":
sub_len = min(i2 - i1, j2 - j1)
for idx in range(sub_len):
r_word = ref_words[i1 + idx]
h_word = hyp_words[j1 + idx]
highlighted_transcription.append(f"<span style='color:red'>{h_word}</span>")
incorrect.append((r_word, h_word))
if (i2 - i1) > sub_len:
missed.extend(ref_words[i1 + sub_len:i2])
if (j2 - j1) > sub_len:
for word in hyp_words[j1 + sub_len:j2]:
highlighted_transcription.append(f"<span style='color:red'>{word}</span>")
extra.append(word)
elif tag == "delete":
missed.extend(ref_words[i1:i2])
elif tag == "insert":
for word in hyp_words[j1:j2]:
highlighted_transcription.append(f"<span style='color:red'>{word}</span>")
extra.append(word)
highlighted_str = " ".join(highlighted_transcription)
return highlighted_str, missed, incorrect, extra
def process_audio(verse_from, verse_to, audio_file):
print("[PROCESS] Initializing...")
verse_from = int(verse_from)
verse_to = int(verse_to)
# def process_audio(verse_from, audio_file):
# verse_from = int(verse_from)
# verse_to = int(verse_from)
if verse_from not in fateha_ayahs or verse_to not in fateha_ayahs:
return "<p style='color:red'>Invalid verse number. Please choose a number between 1 and 7.</p>"
verse_number = f"{verse_from}" if verse_from == verse_to else f"{verse_from} - {verse_to}"
print(f"[PROCESS] Processing ayah: {verse_number}")
ground_truth = ""
n = verse_from
while n <= verse_to:
ground_truth = ground_truth + " " + fateha_ayahs[n]
n += 1
print(f"[PROCESS] Ayah ref: {ground_truth}")
# audio_file is a file path because we use type="filepath"
result = asr_pipeline(audio_file)
print(f"[PROCESS] Result: {result}")
transcription = result["text"]
highlighted_transcription, missed, incorrect, extra = compare_texts(
ground_truth, transcription, ignore_diacritics=False
)
html_output = f"""
<html>
<head>
<style>
body {{ font-family: Arial, sans-serif; margin: 20px; }}
table, th, td {{ border: 1px solid #ccc; border-collapse: collapse; padding: 8px; }}
</style>
</head>
<body>
<h2>Ground Truth (Verse {verse_number}):</h2>
<p>{ground_truth}</p>
<h2>Model Transcription:</h2>
<p>{transcription}</p>
<h2>Highlighted Transcription (mismatches in red):</h2>
<p>{highlighted_transcription}</p>
<h2>Differences:</h2>
<p><strong>Missed Words:</strong> {" ".join(missed) if missed else "None"}</p>
<p><strong>Incorrect Words (Expected -> Produced):</strong> {"; ".join([f"{exp} -> {prod}" for exp, prod in incorrect]) if incorrect else "None"}</p>
<p><strong>Extra Words:</strong> {" ".join(extra) if extra else "None"}</p>
</body>
</html>
"""
return html_output
def update_verse_to(verse_from):
n = verse_from
verse_to = []
while n <= 7:
verse_to.append(n)
n += 1
return gr.update(choices=verse_to, value=verse_from, interactive=True)
with gr.Blocks(title="ASR Surah Al-Fatihah") as demo:
gr.HTML(
f"""
<div style="text-align: center;">
<h1 style="margin-bottom: 0;">ASR Surah Al-Fatihah</h1>
</div>
"""
)
gr.Markdown("Demo pengecekan bacaan Al-Fatihah")
with gr.Row():
with gr.Column():
with gr.Row():
a_from = gr.Dropdown(
choices=list(fateha_ayahs.keys()),
value=1,
label="Dari ayah",
interactive=True,
allow_custom_value=True
)
a_to = gr.Dropdown(
choices=list(fateha_ayahs.keys()),
value=1,
label="Hingga ayah",
interactive=True,
allow_custom_value=True
)
a_from.change(
fn=update_verse_to,
inputs=[a_from],
outputs=[a_to]
)
audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Unggah file atau rekam dengan mikrofon")
btn = gr.Button("Kirim", variant="primary")
with gr.Column():
output = gr.HTML(label="Hasil Analisis")
btn.click(
fn=process_audio,
inputs=[a_from, a_to, audio],
outputs=[output]
)
# Launch
if __name__ == "__main__":
demo.launch(share=True)
|