mark-muhammad commited on
Commit
7b51de6
ยท
1 Parent(s): f086138

Add initial implementation of ASR for Surah Al-Fatihah with Gradio interface

Browse files
Files changed (2) hide show
  1. app.py +185 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import difflib
3
+ from transformers import pipeline
4
+ import unicodedata
5
+
6
+ # Initialize the ASR pipeline (model loaded once at startup)
7
+ asr_pipeline = pipeline("automatic-speech-recognition", model="tarteel-ai/whisper-base-ar-quran")
8
+
9
+ # Ground truth for Surah Al-Fatiha (each ayah)
10
+ fateha_ayahs = {
11
+ 1: "ุจูุณู’ู…ู ุงู„ู„ู‘ูŽู‡ู ุงู„ุฑู‘ูŽุญู’ู…ูŽู†ู ุงู„ุฑู‘ูŽุญููŠู…ู",
12
+ 2: "ุงู„ู’ุญูŽู…ู’ุฏู ู„ูู„ู‘ูŽู‡ู ุฑูŽุจู‘ู ุงู„ู’ุนูŽุงู„ูŽู…ููŠู†ูŽ",
13
+ 3: "ูฑู„ุฑู‘ูŽุญู’ู…ูŽู†ู ูฑู„ุฑู‘ูŽุญููŠู…ู",
14
+ 4: "ู…ูŽุงู„ููƒู ูŠูŽูˆู’ู…ู ุงู„ุฏู‘ููŠู†ู",
15
+ 5: "ุฅููŠู‘ูŽุงูƒูŽ ู†ูŽุนู’ุจูุฏู ูˆูŽุฅููŠู‘ูŽุงูƒูŽ ู†ูŽุณู’ุชูŽุนููŠู†ู",
16
+ 6: "ุงู‡ู’ุฏูู†ูŽุง ุงู„ุตู‘ูุฑูŽุงุทูŽ ุงู„ู’ู…ูุณู’ุชูŽู‚ููŠู…ูŽ",
17
+ 7: "ุตูุฑูŽุงุทูŽ ุงู„ู‘ูŽุฐููŠู†ูŽ ุฃูŽู†ู’ุนูŽู…ู’ุชูŽ ุนูŽู„ูŽูŠู’ู‡ูู…ู’ ุบูŽูŠู’ุฑู ุงู„ู’ู…ูŽุบู’ุถููˆุจู ุนูŽู„ูŽูŠู’ู‡ูู…ู’ ูˆูŽู„ูŽุง ุงู„ุถู‘ูŽุงู„ู‘ููŠู†ูŽ"
18
+ }
19
+
20
+ def remove_diacritics(text: str) -> str:
21
+ """Remove Arabic diacritics from text using Unicode normalization."""
22
+ normalized_text = unicodedata.normalize('NFKD', text)
23
+ return ''.join([c for c in normalized_text if not unicodedata.combining(c)])
24
+
25
+ def compare_texts(ref: str, hyp: str, ignore_diacritics: bool = True):
26
+ """
27
+ Compare the reference (ground truth) and hypothesis (ASR output) texts word-by-word.
28
+ Detects:
29
+ - Missed words: present in ref but not in hyp.
30
+ - Incorrect words: substitutions.
31
+ - Extra words: inserted in hyp.
32
+ Returns:
33
+ - highlighted_str: the transcription with wrong/extra words highlighted in red (HTML).
34
+ - missed: list of missed words.
35
+ - incorrect: list of tuples (expected, produced) for substitution errors.
36
+ - extra: list of extra words.
37
+ """
38
+ if ignore_diacritics:
39
+ ref_norm = remove_diacritics(ref)
40
+ hyp_norm = remove_diacritics(hyp)
41
+ else:
42
+ ref_norm = ref
43
+ hyp_norm = hyp
44
+
45
+ ref_words = ref_norm.split()
46
+ hyp_words = hyp_norm.split()
47
+
48
+ matcher = difflib.SequenceMatcher(None, ref_words, hyp_words)
49
+
50
+ highlighted_transcription = []
51
+ missed = []
52
+ incorrect = []
53
+ extra = []
54
+
55
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
56
+ if tag == "equal":
57
+ highlighted_transcription.extend(hyp_words[j1:j2])
58
+ elif tag == "replace":
59
+ sub_len = min(i2 - i1, j2 - j1)
60
+ for idx in range(sub_len):
61
+ r_word = ref_words[i1 + idx]
62
+ h_word = hyp_words[j1 + idx]
63
+ highlighted_transcription.append(f"<span style='color:red'>{h_word}</span>")
64
+ incorrect.append((r_word, h_word))
65
+ if (i2 - i1) > sub_len:
66
+ missed.extend(ref_words[i1 + sub_len:i2])
67
+ if (j2 - j1) > sub_len:
68
+ for word in hyp_words[j1 + sub_len:j2]:
69
+ highlighted_transcription.append(f"<span style='color:red'>{word}</span>")
70
+ extra.append(word)
71
+ elif tag == "delete":
72
+ missed.extend(ref_words[i1:i2])
73
+ elif tag == "insert":
74
+ for word in hyp_words[j1:j2]:
75
+ highlighted_transcription.append(f"<span style='color:red'>{word}</span>")
76
+ extra.append(word)
77
+
78
+ highlighted_str = " ".join(highlighted_transcription)
79
+ return highlighted_str, missed, incorrect, extra
80
+
81
+ def process_audio(verse_from, verse_to, audio_file):
82
+ print("[PROCESS] Initializing...")
83
+ verse_from = int(verse_from)
84
+ verse_to = int(verse_to)
85
+ # def process_audio(verse_from, audio_file):
86
+ # verse_from = int(verse_from)
87
+ # verse_to = int(verse_from)
88
+ if verse_from not in fateha_ayahs or verse_to not in fateha_ayahs:
89
+ return "<p style='color:red'>Invalid verse number. Please choose a number between 1 and 7.</p>"
90
+
91
+ verse_number = f"{verse_from}" if verse_from == verse_to else f"{verse_from} - {verse_to}"
92
+ print(f"[PROCESS] Processing ayah: {verse_number}")
93
+
94
+ ground_truth = ""
95
+ n = verse_from
96
+ while n <= verse_to:
97
+ ground_truth = ground_truth + " " + fateha_ayahs[n]
98
+ n += 1
99
+ print(f"[PROCESS] Ayah ref: {ground_truth}")
100
+
101
+ # audio_file is a file path because we use type="filepath"
102
+ result = asr_pipeline(audio_file)
103
+ print(f"[PROCESS] Result: {result}")
104
+ transcription = result["text"]
105
+
106
+ highlighted_transcription, missed, incorrect, extra = compare_texts(
107
+ ground_truth, transcription, ignore_diacritics=False
108
+ )
109
+
110
+ html_output = f"""
111
+ <html>
112
+ <head>
113
+ <style>
114
+ body {{ font-family: Arial, sans-serif; margin: 20px; }}
115
+ table, th, td {{ border: 1px solid #ccc; border-collapse: collapse; padding: 8px; }}
116
+ </style>
117
+ </head>
118
+ <body>
119
+ <h2>Ground Truth (Verse {verse_number}):</h2>
120
+ <p>{ground_truth}</p>
121
+ <h2>Model Transcription:</h2>
122
+ <p>{transcription}</p>
123
+ <h2>Highlighted Transcription (mismatches in red):</h2>
124
+ <p>{highlighted_transcription}</p>
125
+ <h2>Differences:</h2>
126
+ <p><strong>Missed Words:</strong> {" ".join(missed) if missed else "None"}</p>
127
+ <p><strong>Incorrect Words (Expected -> Produced):</strong> {"; ".join([f"{exp} -> {prod}" for exp, prod in incorrect]) if incorrect else "None"}</p>
128
+ <p><strong>Extra Words:</strong> {" ".join(extra) if extra else "None"}</p>
129
+ </body>
130
+ </html>
131
+ """
132
+ return html_output
133
+
134
+ def update_verse_to(verse_from):
135
+ n = verse_from
136
+ verse_to = []
137
+ while n <= 7:
138
+ verse_to.append(n)
139
+ n += 1
140
+ return gr.update(choices=verse_to, value=verse_from, interactive=True)
141
+
142
+ with gr.Blocks(title="ASR Surah Al-Fatihah") as demo:
143
+ gr.HTML(
144
+ f"""
145
+ <div style="text-align: center;">
146
+ <h1 style="margin-bottom: 0;">ASR Surah Al-Fatihah</h1>
147
+ </div>
148
+ """
149
+ )
150
+ gr.Markdown("Demo pengecekan bacaan Al-Fatihah")
151
+ with gr.Row():
152
+ with gr.Column():
153
+ with gr.Row():
154
+ a_from = gr.Dropdown(
155
+ choices=list(fateha_ayahs.keys()),
156
+ value=1,
157
+ label="Dari ayah",
158
+ interactive=True,
159
+ allow_custom_value=True
160
+ )
161
+ a_to = gr.Dropdown(
162
+ choices=list(fateha_ayahs.keys()),
163
+ value=1,
164
+ label="Hingga ayah",
165
+ interactive=True,
166
+ allow_custom_value=True
167
+ )
168
+ a_from.change(
169
+ fn=update_verse_to,
170
+ inputs=[a_from],
171
+ outputs=[a_to]
172
+ )
173
+ audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Unggah file atau rekam dengan mikrofon")
174
+ btn = gr.Button("Kirim", variant="primary")
175
+ with gr.Column():
176
+ output = gr.HTML(label="Hasil Analisis")
177
+ btn.click(
178
+ fn=process_audio,
179
+ inputs=[a_from, a_to, audio],
180
+ outputs=[output]
181
+ )
182
+
183
+ # Launch
184
+ if __name__ == "__main__":
185
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch