recitation-segmenter-app-API

Runtime error

App Files Files Community

aboalaa147 commited on Dec 19, 2025

Commit

cb16cfc

verified ·

1 Parent(s): b0b0b0f

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -33

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import numpy as np
 import torch
 import soundfile as sf
 import librosa
-from matplotlib import pyplot as plt
 from transformers import AutoFeatureExtractor, AutoModelForAudioFrameClassification
 from recitations_segmenter import segment_recitations, clean_speech_intervals
 import io
@@ -15,6 +14,9 @@ import zipfile
 # 🔹 ASR client
 from gradio_client import Client, handle_file
 # ======================
 # Setup device and model
 # ======================
@@ -50,6 +52,7 @@ def get_interval(x, intervals, idx, sr=16000):
     return x[start:end]
 def plot_signal(x, intervals, sr=16000):
     fig, ax = plt.subplots(figsize=(20, 4))
     if isinstance(x, torch.Tensor):
         x = x.numpy()
@@ -58,7 +61,6 @@ def plot_signal(x, intervals, sr=16000):
         ax.axvline(x=s * sr, color='red', alpha=0.4)
         ax.axvline(x=e * sr, color='red', alpha=0.4)
     plt.tight_layout()
     buf = io.BytesIO()
     plt.savefig(buf, format="png")
     buf.seek(0)
@@ -69,9 +71,9 @@ def plot_signal(x, intervals, sr=16000):
 # ======================
 # Main processing
 # ======================
-def process_audio(audio_file, min_silence_ms, min_speech_ms, pad_ms):
     if audio_file is None:
-        return None, "⚠️ ارفع ملف صوتي", None, []
     try:
         wav = read_audio(audio_file)
@@ -118,17 +120,30 @@ def process_audio(audio_file, min_silence_ms, min_speech_ms, pad_ms):
                 mic_audio=handle_file(seg_path),
                 api_name="/run"
             )
             full_asr_text.append(asr_text)
-            result_text += (
-                f"🎵 مقطع {i+1} "
-                f"({intervals[i][0]:.2f}s → {intervals[i][1]:.2f}s)\n"
-                f"📜 {asr_text}\n\n"
-            )
-        result_text += "\n🧾 النص الكامل:\n"
-        result_text += " ".join(full_asr_text)
         # ZIP
         zip_path = os.path.join(temp_dir, "segments.zip")
@@ -136,20 +151,21 @@ def process_audio(audio_file, min_silence_ms, min_speech_ms, pad_ms):
             for f in segment_files:
                 zipf.write(f, os.path.basename(f))
-        return plot_img, result_text, zip_path, segment_files
     except Exception as e:
-        return None, f"❌ خطأ: {str(e)}", None, []
 # ======================
 # Gradio UI
 # ======================
-with gr.Blocks(title="Quran Segmentation + ASR") as demo:
-    gr.Markdown("## 🕌 تقطيع التلاوات + التعرف على النص القرآني (ASR)")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(type="filepath", label="📤 ارفع التلاوة")
             min_silence = gr.Slider(10, 500, 30, step=10, label="Min Silence (ms)")
             min_speech = gr.Slider(10, 500, 30, step=10, label="Min Speech (ms)")
             padding = gr.Slider(0, 200, 30, step=10, label="Padding (ms)")
@@ -157,26 +173,14 @@ with gr.Blocks(title="Quran Segmentation + ASR") as demo:
         with gr.Column():
             plot_out = gr.Image(label="📈 الإشارة")
-            text_out = gr.Textbox(lines=20, label="📜 النص")
     zip_out = gr.File(label="📦 تحميل المقاطع")
-    segment_outputs = [gr.Audio(visible=False) for _ in range(50)]
-    def process_and_show(audio, ms, sp, pad):
-        plot, text, zipf, segments = process_audio(audio, ms, sp, pad)
-        outputs = [plot, text, zipf]
-        for i in range(50):
-            if i < len(segments):
-                outputs.append(gr.Audio(value=segments[i], visible=True))
-            else:
-                outputs.append(gr.Audio(visible=False))
-        return outputs
     btn.click(
-        process_and_show,
-        inputs=[audio_input, min_silence, min_speech, padding],
-        outputs=[plot_out, text_out, zip_out] + segment_outputs
     )
 if __name__ == "__main__":

 import torch
 import soundfile as sf
 import librosa
 from transformers import AutoFeatureExtractor, AutoModelForAudioFrameClassification
 from recitations_segmenter import segment_recitations, clean_speech_intervals
 import io
 # 🔹 ASR client
 from gradio_client import Client, handle_file
+# 🔹 Arabic Aligner
+from arabic_aligner import ArabicAligner  # الملف اللي فيه الكود اللي بعتته قبل كده
 # ======================
 # Setup device and model
 # ======================
     return x[start:end]
 def plot_signal(x, intervals, sr=16000):
+    import matplotlib.pyplot as plt
     fig, ax = plt.subplots(figsize=(20, 4))
     if isinstance(x, torch.Tensor):
         x = x.numpy()
         ax.axvline(x=s * sr, color='red', alpha=0.4)
         ax.axvline(x=e * sr, color='red', alpha=0.4)
     plt.tight_layout()
     buf = io.BytesIO()
     plt.savefig(buf, format="png")
     buf.seek(0)
 # ======================
 # Main processing
 # ======================
+def process_audio_and_compare(audio_file, reference_text, min_silence_ms, min_speech_ms, pad_ms):
     if audio_file is None:
+        return None, "⚠️ ارفع ملف صوتي أولاً", None
     try:
         wav = read_audio(audio_file)
                 mic_audio=handle_file(seg_path),
                 api_name="/run"
             )
             full_asr_text.append(asr_text)
+            result_text += f"🎵 مقطع {i+1} ({intervals[i][0]:.2f}s → {intervals[i][1]:.2f}s)\n📜 {asr_text}\n\n"
+        full_asr_text_str = " ".join(full_asr_text)
+        result_text += f"\n🧾 النص الكامل:\n{full_asr_text_str}\n\n"
+        # 🔹 ArabicAligner comparison
+        aligner = ArabicAligner()
+        align_results = aligner.align_and_compare(full_asr_text_str, reference_text)
+        stats = align_results['statistics']
+        result_text += (
+            f"📊 إحصائيات المقارنة:\n"
+            f"- إجمالي كلمات المرجع: {stats['total_reference_words']}\n"
+            f"- إجمالي كلمات ASR: {stats['total_user_words']}\n"
+            f"- إجمالي الأخطاء: {stats['total_errors']}\n"
+            f"  - أخطاء الكلمات: {stats['word_level_errors']}\n"
+            f"  - أخطاء الحركات: {stats['diacritic_errors']}\n"
+            f"- الدقة: {stats['accuracy']:.2f}%\n\n"
+            f"✏️ تفاصيل الأخطاء:\n"
+        )
+        for i, error in enumerate(align_results['errors'], 1):
+            result_text += f"[{i}] Type: {error.error_type.value.upper()} | User: '{error.user_word}' | Expected: '{error.reference_word}' | Details: {error.details}\n"
         # ZIP
         zip_path = os.path.join(temp_dir, "segments.zip")
             for f in segment_files:
                 zipf.write(f, os.path.basename(f))
+        return plot_img, result_text, zip_path
     except Exception as e:
+        return None, f"❌ خطأ: {str(e)}", None
 # ======================
 # Gradio UI
 # ======================
+with gr.Blocks(title="Quran Segmentation + ASR + Comparison") as demo:
+    gr.Markdown("## 🕌 تقطيع التلاوات + التعرف على النص القرآني + المقارنة بالنص المشكول")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(type="filepath", label="📤 ارفع التلاوة")
+            reference_text_input = gr.Textbox(label="📖 أدخل نص القرآن المشكول للمقارنة", lines=10)
             min_silence = gr.Slider(10, 500, 30, step=10, label="Min Silence (ms)")
             min_speech = gr.Slider(10, 500, 30, step=10, label="Min Speech (ms)")
             padding = gr.Slider(0, 200, 30, step=10, label="Padding (ms)")
         with gr.Column():
             plot_out = gr.Image(label="📈 الإشارة")
+            text_out = gr.Textbox(lines=30, label="📜 النتائج")
     zip_out = gr.File(label="📦 تحميل المقاطع")
     btn.click(
+        fn=process_audio_and_compare,
+        inputs=[audio_input, reference_text_input, min_silence, min_speech, padding],
+        outputs=[plot_out, text_out, zip_out]
     )
 if __name__ == "__main__":