Alaa16 commited on
Commit
04b4868
·
verified ·
1 Parent(s): 460739c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +312 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import faiss
4
+ import torch
5
+ import gradio as gr
6
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
7
+ from sentence_transformers import SentenceTransformer
8
+ import librosa
9
+
10
+ device = "cpu"
11
+
12
+ # --------------- Load Models ---------------
13
+ asr_pipeline = pipeline(
14
+ "automatic-speech-recognition",
15
+ model="openai/whisper-small",
16
+ chunk_length_s=30,
17
+ device=device,
18
+ )
19
+ forced_decoder_ids = asr_pipeline.tokenizer.get_decoder_prompt_ids(
20
+ language="arabic", task="transcribe"
21
+ )
22
+
23
+ summ_model_name = "csebuetnlp/mT5_multilingual_XLSum"
24
+ summ_tokenizer = AutoTokenizer.from_pretrained(summ_model_name)
25
+ summ_model = AutoModelForSeq2SeqLM.from_pretrained(summ_model_name)
26
+
27
+ embedding_model = SentenceTransformer("intfloat/multilingual-e5-base")
28
+ embedding_dim = embedding_model.get_sentence_embedding_dimension()
29
+
30
+ emotion_classifier = pipeline(
31
+ "audio-classification",
32
+ model="Dpngtm/wav2vec2-emotion-recognition",
33
+ device=-1,
34
+ )
35
+
36
+ # --------------- FAISS Index ---------------
37
+ index = faiss.IndexFlatIP(embedding_dim)
38
+ text_segments = []
39
+
40
+ KEYWORDS = {
41
+ "ذكاء اصطناعي": "AI", "تعلم عميق": "Deep Learning",
42
+ "شبكة عصبية": "Neural Network", "تعلم آلي": "Machine Learning",
43
+ "معالجة اللغات": "NLP", "رؤية حاسوبية": "Computer Vision",
44
+ "بيانات": "Data", "نموذج": "Model", "تدريب": "Training",
45
+ "خوارزمية": "Algorithm", "تصنيف": "Classification",
46
+ "استرجاع": "Retrieval", "تحليل": "Analysis",
47
+ "محاضرة": "Lecture", "جامعة": "University",
48
+ "بحث": "Research", "مشروع": "Project",
49
+ }
50
+
51
+ EMOTION_ICONS = {
52
+ "happy": "😊", "sad": "😢", "angry": "😡", "neutral": "😐",
53
+ "calm": "😌", "fearful": "😨", "disgust": "🤢", "surprised": "😲",
54
+ }
55
+
56
+
57
+ # --------------- Pipeline Functions ---------------
58
+ def encode_texts(texts, prefix="passage: "):
59
+ prefixed = [prefix + t for t in texts]
60
+ embeddings = embedding_model.encode(prefixed, normalize_embeddings=True)
61
+ return np.array(embeddings).astype("float32")
62
+
63
+
64
+ def transcribe_audio(audio_path):
65
+ result = asr_pipeline(
66
+ audio_path,
67
+ return_timestamps=True,
68
+ generate_kwargs={"forced_decoder_ids": forced_decoder_ids},
69
+ )
70
+ full_text = result["text"]
71
+ chunks = result.get("chunks", [])
72
+ if not chunks:
73
+ chunks = [{"text": full_text, "timestamp": (0.0, 0.0)}]
74
+ return full_text, chunks
75
+
76
+
77
+ def summarize_text(text, max_input=512, max_output=150):
78
+ inputs = summ_tokenizer(
79
+ [text.strip()],
80
+ max_length=max_input,
81
+ truncation=True,
82
+ padding="max_length",
83
+ return_tensors="pt",
84
+ )
85
+ summary_ids = summ_model.generate(
86
+ inputs["input_ids"],
87
+ attention_mask=inputs["attention_mask"],
88
+ num_beams=2,
89
+ max_length=max_output,
90
+ early_stopping=True,
91
+ no_repeat_ngram_size=3,
92
+ )
93
+ return summ_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
94
+
95
+
96
+ def detect_emotion(audio_path):
97
+ audio, sr = librosa.load(audio_path, sr=16000, duration=15.0)
98
+ predictions = emotion_classifier({"array": audio, "sampling_rate": sr})
99
+ top = max(predictions, key=lambda x: x["score"])
100
+ return top["label"], top["score"]
101
+
102
+
103
+ def detect_keywords(text):
104
+ found = []
105
+ for ar, en in KEYWORDS.items():
106
+ count = text.count(ar)
107
+ if count > 0:
108
+ found.append({"keyword_ar": ar, "keyword_en": en, "count": count})
109
+ found.sort(key=lambda x: x["count"], reverse=True)
110
+ return found
111
+
112
+
113
+ def index_segments(chunks):
114
+ global index, text_segments
115
+ index = faiss.IndexFlatIP(embedding_dim)
116
+ text_segments = chunks
117
+ segment_texts = [c["text"] for c in chunks]
118
+ embeddings = encode_texts(segment_texts, prefix="passage: ")
119
+ index.add(embeddings)
120
+ return len(chunks)
121
+
122
+
123
+ def search_query(query, top_k=3):
124
+ if index.ntotal == 0:
125
+ return "لم يتم تحميل أي ملف صوتي بعد. قم برفع ملف أولاً."
126
+ query_emb = encode_texts([query], prefix="query: ")
127
+ scores, indices = index.search(query_emb, k=min(top_k, index.ntotal))
128
+ results = []
129
+ for rank, (i, score) in enumerate(zip(indices[0], scores[0]), 1):
130
+ if i < len(text_segments):
131
+ seg = text_segments[i]
132
+ start = seg["timestamp"][0] or 0.0
133
+ end = seg["timestamp"][1] or 0.0
134
+ sm, ss = int(start // 60), int(start % 60)
135
+ em, es = int(end // 60), int(end % 60)
136
+ time_str = f"{sm}:{ss:02d} - {em}:{es:02d}"
137
+ results.append(
138
+ f"**#{rank}** | تطابق: {score * 100:.1f}% | ⏱️ {time_str}\n> {seg['text']}"
139
+ )
140
+ return "\n\n".join(results) if results else "لا توجد نتائج"
141
+
142
+
143
+ # --------------- Main Process ---------------
144
+ def process_audio(audio_path, progress=gr.Progress()):
145
+ if audio_path is None:
146
+ raise gr.Error("يرجى ��فع ملف صوتي أولاً")
147
+
148
+ progress(0.05, desc="تحليل المشاعر...")
149
+ emotion_label, emotion_conf = detect_emotion(audio_path)
150
+ icon = EMOTION_ICONS.get(emotion_label.lower(), "🎵")
151
+ emotion_result = f"{icon} {emotion_label} ({emotion_conf * 100:.1f}%)"
152
+
153
+ progress(0.25, desc="تحويل الصوت إلى نص...")
154
+ full_text, chunks = transcribe_audio(audio_path)
155
+
156
+ progress(0.60, desc="إنشاء الملخص...")
157
+ summary = summarize_text(full_text)
158
+
159
+ progress(0.80, desc="فهرسة المقاطع...")
160
+ n_segments = index_segments(chunks)
161
+
162
+ progress(0.90, desc="استخراج الكلمات المفتاحية...")
163
+ keywords = detect_keywords(full_text)
164
+ kw_text = " ".join(
165
+ [f"🔑 {k['keyword_ar']} ({k['keyword_en']}) ×{k['count']}" for k in keywords]
166
+ )
167
+ if not kw_text:
168
+ kw_text = "لم يتم العثور على كلمات مفتاحية"
169
+
170
+ seg_info = f"✅ تم فهرسة {n_segments} مقطع للبحث الدلالي"
171
+
172
+ progress(1.0, desc="تم!")
173
+ return emotion_result, full_text, summary, kw_text, seg_info
174
+
175
+
176
+ def do_search(query):
177
+ if not query or not query.strip():
178
+ return "يرجى إدخال استعلام للبحث"
179
+ return search_query(query.strip(), top_k=5)
180
+
181
+
182
+ # --------------- Gradio UI ---------------
183
+ CUSTOM_CSS = """
184
+ .gradio-container {
185
+ max-width: 1200px !important;
186
+ font-family: 'Inter', sans-serif !important;
187
+ }
188
+ .main-title {
189
+ text-align: center;
190
+ background: linear-gradient(135deg, #49f4c8, #7c3aed);
191
+ -webkit-background-clip: text;
192
+ -webkit-text-fill-color: transparent;
193
+ font-size: 2.5rem;
194
+ font-weight: 800;
195
+ margin-bottom: 0.5rem;
196
+ }
197
+ .sub-title {
198
+ text-align: center;
199
+ color: #a0abc2;
200
+ font-size: 1.1rem;
201
+ margin-bottom: 2rem;
202
+ }
203
+ """
204
+
205
+ with gr.Blocks(
206
+ theme=gr.themes.Base(
207
+ primary_hue=gr.themes.colors.emerald,
208
+ secondary_hue=gr.themes.colors.purple,
209
+ neutral_hue=gr.themes.colors.slate,
210
+ font=gr.themes.GoogleFont("Inter"),
211
+ ),
212
+ css=CUSTOM_CSS,
213
+ title="ArabEdu",
214
+ ) as demo:
215
+
216
+ gr.HTML(
217
+ """
218
+ <div class="main-title">ArabEdu</div>
219
+ <div class="sub-title">
220
+ نظام فهم المحاضرات العربية — حوّل محاضراتك الصوتية إلى نصوص ذكية وملخصات دقيقة
221
+ </div>
222
+ """
223
+ )
224
+
225
+ with gr.Row():
226
+ audio_input = gr.Audio(
227
+ label="📁 رفع الملف الصوتي",
228
+ type="filepath",
229
+ sources=["upload", "microphone"],
230
+ )
231
+
232
+ process_btn = gr.Button(
233
+ "🚀 معالجة الملف الصوتي",
234
+ variant="primary",
235
+ size="lg",
236
+ )
237
+
238
+ with gr.Row():
239
+ emotion_output = gr.Textbox(
240
+ label="🎭 تحليل المشاعر الصوتية",
241
+ interactive=False,
242
+ scale=1,
243
+ )
244
+
245
+ with gr.Row():
246
+ with gr.Column(scale=2):
247
+ transcript_output = gr.Textbox(
248
+ label="📝 النص الكامل",
249
+ interactive=False,
250
+ lines=10,
251
+ rtl=True,
252
+ )
253
+ with gr.Column(scale=1):
254
+ summary_output = gr.Textbox(
255
+ label="📋 الملخص",
256
+ interactive=False,
257
+ lines=6,
258
+ rtl=True,
259
+ )
260
+ keywords_output = gr.Textbox(
261
+ label="🔑 الكلمات المفتاحية",
262
+ interactive=False,
263
+ lines=3,
264
+ rtl=True,
265
+ )
266
+
267
+ seg_info_output = gr.Textbox(
268
+ label="فهرسة",
269
+ interactive=False,
270
+ visible=True,
271
+ )
272
+
273
+ gr.Markdown("---")
274
+ gr.Markdown("### 🔍 البحث الدلالي في المحتوى")
275
+
276
+ with gr.Row():
277
+ search_input = gr.Textbox(
278
+ label="ابحث عن موضوع معين في التسجيل",
279
+ placeholder="مثال: ما هو الذكاء الاصطناعي؟",
280
+ scale=4,
281
+ rtl=True,
282
+ )
283
+ search_btn = gr.Button("🔍 بحث", variant="secondary", scale=1)
284
+
285
+ search_output = gr.Markdown(label="نتائج البحث", rtl=True)
286
+
287
+ process_btn.click(
288
+ fn=process_audio,
289
+ inputs=[audio_input],
290
+ outputs=[
291
+ emotion_output,
292
+ transcript_output,
293
+ summary_output,
294
+ keywords_output,
295
+ seg_info_output,
296
+ ],
297
+ )
298
+
299
+ search_btn.click(
300
+ fn=do_search,
301
+ inputs=[search_input],
302
+ outputs=[search_output],
303
+ )
304
+
305
+ search_input.submit(
306
+ fn=do_search,
307
+ inputs=[search_input],
308
+ outputs=[search_output],
309
+ )
310
+
311
+ demo.queue()
312
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ accelerate
4
+ faiss-cpu
5
+ sentencepiece
6
+ sentence-transformers
7
+ librosa
8
+ gradio>=4.0