Spaces:

Alaa16
/

ArabEdu

Sleeping

App Files Files Community

Alaa16 commited on 14 days ago

Commit

04b4868

verified ·

1 Parent(s): 460739c

Upload 2 files

Browse files

Files changed (2) hide show

app.py +312 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,312 @@

+import os
+import numpy as np
+import faiss
+import torch
+import gradio as gr
+from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
+from sentence_transformers import SentenceTransformer
+import librosa
+device = "cpu"
+# --------------- Load Models ---------------
+asr_pipeline = pipeline(
+    "automatic-speech-recognition",
+    model="openai/whisper-small",
+    chunk_length_s=30,
+    device=device,
+)
+forced_decoder_ids = asr_pipeline.tokenizer.get_decoder_prompt_ids(
+    language="arabic", task="transcribe"
+)
+summ_model_name = "csebuetnlp/mT5_multilingual_XLSum"
+summ_tokenizer = AutoTokenizer.from_pretrained(summ_model_name)
+summ_model = AutoModelForSeq2SeqLM.from_pretrained(summ_model_name)
+embedding_model = SentenceTransformer("intfloat/multilingual-e5-base")
+embedding_dim = embedding_model.get_sentence_embedding_dimension()
+emotion_classifier = pipeline(
+    "audio-classification",
+    model="Dpngtm/wav2vec2-emotion-recognition",
+    device=-1,
+)
+# --------------- FAISS Index ---------------
+index = faiss.IndexFlatIP(embedding_dim)
+text_segments = []
+KEYWORDS = {
+    "ذكاء اصطناعي": "AI", "تعلم عميق": "Deep Learning",
+    "شبكة عصبية": "Neural Network", "تعلم آلي": "Machine Learning",
+    "معالجة اللغات": "NLP", "رؤية حاسوبية": "Computer Vision",
+    "بيانات": "Data", "نموذج": "Model", "تدريب": "Training",
+    "خوارزمية": "Algorithm", "تصنيف": "Classification",
+    "استرجاع": "Retrieval", "تحليل": "Analysis",
+    "محاضرة": "Lecture", "جامعة": "University",
+    "بحث": "Research", "مشروع": "Project",
+}
+EMOTION_ICONS = {
+    "happy": "😊", "sad": "😢", "angry": "😡", "neutral": "😐",
+    "calm": "😌", "fearful": "😨", "disgust": "🤢", "surprised": "😲",
+}
+# --------------- Pipeline Functions ---------------
+def encode_texts(texts, prefix="passage: "):
+    prefixed = [prefix + t for t in texts]
+    embeddings = embedding_model.encode(prefixed, normalize_embeddings=True)
+    return np.array(embeddings).astype("float32")
+def transcribe_audio(audio_path):
+    result = asr_pipeline(
+        audio_path,
+        return_timestamps=True,
+        generate_kwargs={"forced_decoder_ids": forced_decoder_ids},
+    )
+    full_text = result["text"]
+    chunks = result.get("chunks", [])
+    if not chunks:
+        chunks = [{"text": full_text, "timestamp": (0.0, 0.0)}]
+    return full_text, chunks
+def summarize_text(text, max_input=512, max_output=150):
+    inputs = summ_tokenizer(
+        [text.strip()],
+        max_length=max_input,
+        truncation=True,
+        padding="max_length",
+        return_tensors="pt",
+    )
+    summary_ids = summ_model.generate(
+        inputs["input_ids"],
+        attention_mask=inputs["attention_mask"],
+        num_beams=2,
+        max_length=max_output,
+        early_stopping=True,
+        no_repeat_ngram_size=3,
+    )
+    return summ_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+def detect_emotion(audio_path):
+    audio, sr = librosa.load(audio_path, sr=16000, duration=15.0)
+    predictions = emotion_classifier({"array": audio, "sampling_rate": sr})
+    top = max(predictions, key=lambda x: x["score"])
+    return top["label"], top["score"]
+def detect_keywords(text):
+    found = []
+    for ar, en in KEYWORDS.items():
+        count = text.count(ar)
+        if count > 0:
+            found.append({"keyword_ar": ar, "keyword_en": en, "count": count})
+    found.sort(key=lambda x: x["count"], reverse=True)
+    return found
+def index_segments(chunks):
+    global index, text_segments
+    index = faiss.IndexFlatIP(embedding_dim)
+    text_segments = chunks
+    segment_texts = [c["text"] for c in chunks]
+    embeddings = encode_texts(segment_texts, prefix="passage: ")
+    index.add(embeddings)
+    return len(chunks)
+def search_query(query, top_k=3):
+    if index.ntotal == 0:
+        return "لم يتم تحميل أي ملف صوتي بعد. قم برفع ملف أولاً."
+    query_emb = encode_texts([query], prefix="query: ")
+    scores, indices = index.search(query_emb, k=min(top_k, index.ntotal))
+    results = []
+    for rank, (i, score) in enumerate(zip(indices[0], scores[0]), 1):
+        if i < len(text_segments):
+            seg = text_segments[i]
+            start = seg["timestamp"][0] or 0.0
+            end = seg["timestamp"][1] or 0.0
+            sm, ss = int(start // 60), int(start % 60)
+            em, es = int(end // 60), int(end % 60)
+            time_str = f"{sm}:{ss:02d} - {em}:{es:02d}"
+            results.append(
+                f"**#{rank}** | تطابق: {score * 100:.1f}% | ⏱️ {time_str}\n> {seg['text']}"
+            )
+    return "\n\n".join(results) if results else "لا توجد نتائج"
+# --------------- Main Process ---------------
+def process_audio(audio_path, progress=gr.Progress()):
+    if audio_path is None:
+        raise gr.Error("يرجى ��فع ملف صوتي أولاً")
+    progress(0.05, desc="تحليل المشاعر...")
+    emotion_label, emotion_conf = detect_emotion(audio_path)
+    icon = EMOTION_ICONS.get(emotion_label.lower(), "🎵")
+    emotion_result = f"{icon} {emotion_label} ({emotion_conf * 100:.1f}%)"
+    progress(0.25, desc="تحويل الصوت إلى نص...")
+    full_text, chunks = transcribe_audio(audio_path)
+    progress(0.60, desc="إنشاء الملخص...")
+    summary = summarize_text(full_text)
+    progress(0.80, desc="فهرسة المقاطع...")
+    n_segments = index_segments(chunks)
+    progress(0.90, desc="استخراج الكلمات المفتاحية...")
+    keywords = detect_keywords(full_text)
+    kw_text = "  ".join(
+        [f"🔑 {k['keyword_ar']} ({k['keyword_en']}) ×{k['count']}" for k in keywords]
+    )
+    if not kw_text:
+        kw_text = "لم يتم العثور على كلمات مفتاحية"
+    seg_info = f"✅ تم فهرسة {n_segments} مقطع للبحث الدلالي"
+    progress(1.0, desc="تم!")
+    return emotion_result, full_text, summary, kw_text, seg_info
+def do_search(query):
+    if not query or not query.strip():
+        return "يرجى إدخال استعلام للبحث"
+    return search_query(query.strip(), top_k=5)
+# --------------- Gradio UI ---------------
+CUSTOM_CSS = """
+.gradio-container {
+    max-width: 1200px !important;
+    font-family: 'Inter', sans-serif !important;
+}
+.main-title {
+    text-align: center;
+    background: linear-gradient(135deg, #49f4c8, #7c3aed);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-size: 2.5rem;
+    font-weight: 800;
+    margin-bottom: 0.5rem;
+}
+.sub-title {
+    text-align: center;
+    color: #a0abc2;
+    font-size: 1.1rem;
+    margin-bottom: 2rem;
+}
+"""
+with gr.Blocks(
+    theme=gr.themes.Base(
+        primary_hue=gr.themes.colors.emerald,
+        secondary_hue=gr.themes.colors.purple,
+        neutral_hue=gr.themes.colors.slate,
+        font=gr.themes.GoogleFont("Inter"),
+    ),
+    css=CUSTOM_CSS,
+    title="ArabEdu",
+) as demo:
+    gr.HTML(
+        """
+        <div class="main-title">ArabEdu</div>
+        <div class="sub-title">
+            نظام فهم المحاضرات العربية — حوّل محاضراتك الصوتية إلى نصوص ذكية وملخصات دقيقة
+        </div>
+        """
+    )
+    with gr.Row():
+        audio_input = gr.Audio(
+            label="📁 رفع الملف الصوتي",
+            type="filepath",
+            sources=["upload", "microphone"],
+        )
+    process_btn = gr.Button(
+        "🚀 معالجة الملف الصوتي",
+        variant="primary",
+        size="lg",
+    )
+    with gr.Row():
+        emotion_output = gr.Textbox(
+            label="🎭 تحليل المشاعر الصوتية",
+            interactive=False,
+            scale=1,
+        )
+    with gr.Row():
+        with gr.Column(scale=2):
+            transcript_output = gr.Textbox(
+                label="📝 النص الكامل",
+                interactive=False,
+                lines=10,
+                rtl=True,
+            )
+        with gr.Column(scale=1):
+            summary_output = gr.Textbox(
+                label="📋 الملخص",
+                interactive=False,
+                lines=6,
+                rtl=True,
+            )
+            keywords_output = gr.Textbox(
+                label="🔑 الكلمات المفتاحية",
+                interactive=False,
+                lines=3,
+                rtl=True,
+            )
+    seg_info_output = gr.Textbox(
+        label="فهرسة",
+        interactive=False,
+        visible=True,
+    )
+    gr.Markdown("---")
+    gr.Markdown("### 🔍 البحث الدلالي في المحتوى")
+    with gr.Row():
+        search_input = gr.Textbox(
+            label="ابحث عن موضوع معين في التسجيل",
+            placeholder="مثال: ما هو الذكاء الاصطناعي؟",
+            scale=4,
+            rtl=True,
+        )
+        search_btn = gr.Button("🔍 بحث", variant="secondary", scale=1)
+    search_output = gr.Markdown(label="نتائج البحث", rtl=True)
+    process_btn.click(
+        fn=process_audio,
+        inputs=[audio_input],
+        outputs=[
+            emotion_output,
+            transcript_output,
+            summary_output,
+            keywords_output,
+            seg_info_output,
+        ],
+    )
+    search_btn.click(
+        fn=do_search,
+        inputs=[search_input],
+        outputs=[search_output],
+    )
+    search_input.submit(
+        fn=do_search,
+        inputs=[search_input],
+        outputs=[search_output],
+    )
+demo.queue()
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+transformers
+torch
+accelerate
+faiss-cpu
+sentencepiece
+sentence-transformers
+librosa
+gradio>=4.0