aboalaa147 commited on
Commit
cb16cfc
·
verified ·
1 Parent(s): b0b0b0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -33
app.py CHANGED
@@ -3,7 +3,6 @@ import numpy as np
3
  import torch
4
  import soundfile as sf
5
  import librosa
6
- from matplotlib import pyplot as plt
7
  from transformers import AutoFeatureExtractor, AutoModelForAudioFrameClassification
8
  from recitations_segmenter import segment_recitations, clean_speech_intervals
9
  import io
@@ -15,6 +14,9 @@ import zipfile
15
  # 🔹 ASR client
16
  from gradio_client import Client, handle_file
17
 
 
 
 
18
  # ======================
19
  # Setup device and model
20
  # ======================
@@ -50,6 +52,7 @@ def get_interval(x, intervals, idx, sr=16000):
50
  return x[start:end]
51
 
52
  def plot_signal(x, intervals, sr=16000):
 
53
  fig, ax = plt.subplots(figsize=(20, 4))
54
  if isinstance(x, torch.Tensor):
55
  x = x.numpy()
@@ -58,7 +61,6 @@ def plot_signal(x, intervals, sr=16000):
58
  ax.axvline(x=s * sr, color='red', alpha=0.4)
59
  ax.axvline(x=e * sr, color='red', alpha=0.4)
60
  plt.tight_layout()
61
-
62
  buf = io.BytesIO()
63
  plt.savefig(buf, format="png")
64
  buf.seek(0)
@@ -69,9 +71,9 @@ def plot_signal(x, intervals, sr=16000):
69
  # ======================
70
  # Main processing
71
  # ======================
72
- def process_audio(audio_file, min_silence_ms, min_speech_ms, pad_ms):
73
  if audio_file is None:
74
- return None, "⚠️ ارفع ملف صوتي", None, []
75
 
76
  try:
77
  wav = read_audio(audio_file)
@@ -118,17 +120,30 @@ def process_audio(audio_file, min_silence_ms, min_speech_ms, pad_ms):
118
  mic_audio=handle_file(seg_path),
119
  api_name="/run"
120
  )
121
-
122
  full_asr_text.append(asr_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- result_text += (
125
- f"🎵 مقطع {i+1} "
126
- f"({intervals[i][0]:.2f}s → {intervals[i][1]:.2f}s)\n"
127
- f"📜 {asr_text}\n\n"
128
- )
129
-
130
- result_text += "\n🧾 النص الكامل:\n"
131
- result_text += " ".join(full_asr_text)
132
 
133
  # ZIP
134
  zip_path = os.path.join(temp_dir, "segments.zip")
@@ -136,20 +151,21 @@ def process_audio(audio_file, min_silence_ms, min_speech_ms, pad_ms):
136
  for f in segment_files:
137
  zipf.write(f, os.path.basename(f))
138
 
139
- return plot_img, result_text, zip_path, segment_files
140
 
141
  except Exception as e:
142
- return None, f"❌ خطأ: {str(e)}", None, []
143
 
144
  # ======================
145
  # Gradio UI
146
  # ======================
147
- with gr.Blocks(title="Quran Segmentation + ASR") as demo:
148
- gr.Markdown("## 🕌 تقطيع التلاوات + التعرف على النص القرآني (ASR)")
149
 
150
  with gr.Row():
151
  with gr.Column():
152
  audio_input = gr.Audio(type="filepath", label="📤 ارفع التلاوة")
 
153
  min_silence = gr.Slider(10, 500, 30, step=10, label="Min Silence (ms)")
154
  min_speech = gr.Slider(10, 500, 30, step=10, label="Min Speech (ms)")
155
  padding = gr.Slider(0, 200, 30, step=10, label="Padding (ms)")
@@ -157,26 +173,14 @@ with gr.Blocks(title="Quran Segmentation + ASR") as demo:
157
 
158
  with gr.Column():
159
  plot_out = gr.Image(label="📈 الإشارة")
160
- text_out = gr.Textbox(lines=20, label="📜 النص")
161
 
162
  zip_out = gr.File(label="📦 تحميل المقاطع")
163
 
164
- segment_outputs = [gr.Audio(visible=False) for _ in range(50)]
165
-
166
- def process_and_show(audio, ms, sp, pad):
167
- plot, text, zipf, segments = process_audio(audio, ms, sp, pad)
168
- outputs = [plot, text, zipf]
169
- for i in range(50):
170
- if i < len(segments):
171
- outputs.append(gr.Audio(value=segments[i], visible=True))
172
- else:
173
- outputs.append(gr.Audio(visible=False))
174
- return outputs
175
-
176
  btn.click(
177
- process_and_show,
178
- inputs=[audio_input, min_silence, min_speech, padding],
179
- outputs=[plot_out, text_out, zip_out] + segment_outputs
180
  )
181
 
182
  if __name__ == "__main__":
 
3
  import torch
4
  import soundfile as sf
5
  import librosa
 
6
  from transformers import AutoFeatureExtractor, AutoModelForAudioFrameClassification
7
  from recitations_segmenter import segment_recitations, clean_speech_intervals
8
  import io
 
14
  # 🔹 ASR client
15
  from gradio_client import Client, handle_file
16
 
17
+ # 🔹 Arabic Aligner
18
+ from arabic_aligner import ArabicAligner # الملف اللي فيه الكود اللي بعتته قبل كده
19
+
20
  # ======================
21
  # Setup device and model
22
  # ======================
 
52
  return x[start:end]
53
 
54
  def plot_signal(x, intervals, sr=16000):
55
+ import matplotlib.pyplot as plt
56
  fig, ax = plt.subplots(figsize=(20, 4))
57
  if isinstance(x, torch.Tensor):
58
  x = x.numpy()
 
61
  ax.axvline(x=s * sr, color='red', alpha=0.4)
62
  ax.axvline(x=e * sr, color='red', alpha=0.4)
63
  plt.tight_layout()
 
64
  buf = io.BytesIO()
65
  plt.savefig(buf, format="png")
66
  buf.seek(0)
 
71
  # ======================
72
  # Main processing
73
  # ======================
74
+ def process_audio_and_compare(audio_file, reference_text, min_silence_ms, min_speech_ms, pad_ms):
75
  if audio_file is None:
76
+ return None, "⚠️ ارفع ملف صوتي أولاً", None
77
 
78
  try:
79
  wav = read_audio(audio_file)
 
120
  mic_audio=handle_file(seg_path),
121
  api_name="/run"
122
  )
 
123
  full_asr_text.append(asr_text)
124
+ result_text += f"🎵 مقطع {i+1} ({intervals[i][0]:.2f}s → {intervals[i][1]:.2f}s)\n📜 {asr_text}\n\n"
125
+
126
+ full_asr_text_str = " ".join(full_asr_text)
127
+ result_text += f"\n🧾 النص الكامل:\n{full_asr_text_str}\n\n"
128
+
129
+ # 🔹 ArabicAligner comparison
130
+ aligner = ArabicAligner()
131
+ align_results = aligner.align_and_compare(full_asr_text_str, reference_text)
132
+
133
+ stats = align_results['statistics']
134
+ result_text += (
135
+ f"📊 إحصائيات المقارنة:\n"
136
+ f"- إجمالي كلمات المرجع: {stats['total_reference_words']}\n"
137
+ f"- إجمالي كلمات ASR: {stats['total_user_words']}\n"
138
+ f"- إجمالي الأخطاء: {stats['total_errors']}\n"
139
+ f" - أخطاء الكلمات: {stats['word_level_errors']}\n"
140
+ f" - أخطاء الحركات: {stats['diacritic_errors']}\n"
141
+ f"- الدقة: {stats['accuracy']:.2f}%\n\n"
142
+ f"✏️ تفاصيل الأخطاء:\n"
143
+ )
144
 
145
+ for i, error in enumerate(align_results['errors'], 1):
146
+ result_text += f"[{i}] Type: {error.error_type.value.upper()} | User: '{error.user_word}' | Expected: '{error.reference_word}' | Details: {error.details}\n"
 
 
 
 
 
 
147
 
148
  # ZIP
149
  zip_path = os.path.join(temp_dir, "segments.zip")
 
151
  for f in segment_files:
152
  zipf.write(f, os.path.basename(f))
153
 
154
+ return plot_img, result_text, zip_path
155
 
156
  except Exception as e:
157
+ return None, f"❌ خطأ: {str(e)}", None
158
 
159
  # ======================
160
  # Gradio UI
161
  # ======================
162
+ with gr.Blocks(title="Quran Segmentation + ASR + Comparison") as demo:
163
+ gr.Markdown("## 🕌 تقطيع التلاوات + التعرف على النص القرآني + المقارنة بالنص المشكول")
164
 
165
  with gr.Row():
166
  with gr.Column():
167
  audio_input = gr.Audio(type="filepath", label="📤 ارفع التلاوة")
168
+ reference_text_input = gr.Textbox(label="📖 أدخل نص القرآن المشكول للمقارنة", lines=10)
169
  min_silence = gr.Slider(10, 500, 30, step=10, label="Min Silence (ms)")
170
  min_speech = gr.Slider(10, 500, 30, step=10, label="Min Speech (ms)")
171
  padding = gr.Slider(0, 200, 30, step=10, label="Padding (ms)")
 
173
 
174
  with gr.Column():
175
  plot_out = gr.Image(label="📈 الإشارة")
176
+ text_out = gr.Textbox(lines=30, label="📜 النتائج")
177
 
178
  zip_out = gr.File(label="📦 تحميل المقاطع")
179
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  btn.click(
181
+ fn=process_audio_and_compare,
182
+ inputs=[audio_input, reference_text_input, min_silence, min_speech, padding],
183
+ outputs=[plot_out, text_out, zip_out]
184
  )
185
 
186
  if __name__ == "__main__":