Spaces:

mo-456
/

FTCE-chatbot

Sleeping

App Files Files Community

mo-456 commited on Jul 19, 2025

Commit

d545b79

verified ·

1 Parent(s): b754287

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -89

app.py CHANGED Viewed

@@ -1,46 +1,40 @@
 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
 import torch
 import logging
 import re
 import os
 from typing import List, Tuple
-# إعداد التسجيل
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# تحميل نموذج MARBERT
-try:
-    model = SentenceTransformer("UBC-NLP/MARBERT", device="cuda" if torch.cuda.is_available() else "cpu")
-    logger.info("تم تحميل النموذج بنجاح")
-except Exception as e:
-    logger.error(f"فشل تحميل النموذج: {e}")
-    raise
-# تحميل المعرفة
-def load_knowledge() -> List[str]:
-    if not os.path.exists("knowledge.txt"):
-        logger.warning("ملف المعرفة غير موجود.")
-        return ["عام: لا يوجد محتوى معرفي متاح."]
-    chunks = []
-    current_section = "عام"
     with open("knowledge.txt", encoding="utf-8") as f:
         for line in f:
             line = line.strip()
             if line.startswith("##"):
-                current_section = line.replace("##", "").strip()
-            elif line:
-                chunks.append(f"{current_section}: {line}")
-    logger.info(f"تم تحميل {len(chunks)} قطعة معرفة.")
-    return chunks
-knowledge_chunks = load_knowledge()
-knowledge_embeddings = model.encode(knowledge_chunks, convert_to_tensor=True)
-# معالجة السؤال
 def preprocess_question(question: str) -> str:
     question = re.sub(r'[؟\?،,\.]', '', question).strip()
     replacements = {
@@ -56,18 +50,23 @@ def preprocess_question(question: str) -> str:
         question = re.sub(pattern, repl, question)
     return question
 # توليد الرد
-def generate_response(question: str, top_chunks: List[Tuple[str, float]]) -> str:
-    if not top_chunks:
-        suggestions = [
-            "أعد صياغة سؤالك.",
-            "جرّب استخدام كلمات أخرى.",
-            "ابحث في قسم مختلف من المعرفة."
-        ]
-        return "لم يتم العثور على إجابة دقيقة.\n\nاقتراحات:\n" + "\n".join(f"- {s}" for s in suggestions)
     sections = {}
-    for chunk, score in top_chunks:
         if ":" in chunk:
             section, content = chunk.split(":", 1)
         else:
@@ -77,68 +76,39 @@ def generate_response(question: str, top_chunks: List[Tuple[str, float]]) -> str
         sections[section].append((content.strip(), score))
     main_section = max(sections, key=lambda s: sum(x[1] for x in sections[s]) / len(sections[s]))
     response = f"سؤالك: {question}\n\n"
-    response += f"{main_section}:\n"
     for content, _ in sorted(sections[main_section], key=lambda x: x[1], reverse=True):
-        response += f"- {content}\n"
-    other_sections = [s for s in sections if s != main_section]
-    if other_sections:
-        response += "\nمعلومات إضافية:\n"
-        for section in other_sections[:2]:
-            response += f"\nمن {section}:\n"
-            for content, _ in sorted(sections[section], key=lambda x: x[1], reverse=True)[:2]:
-                response += f"- {content}\n"
     return response
-# الرد النهائي
-def answer_question(question: str) -> str:
-    if not question or len(question.strip()) < 3:
-        return "يرجى إدخال سؤال واضح لا يقل عن ثلاث كلمات."
-    question = preprocess_question(question)
-    logger.info(f"معالجة السؤال: {question}")
-    try:
-        q_embedding = model.encode(question, convert_to_tensor=True)
-        cos_scores = util.cos_sim(q_embedding, knowledge_embeddings)[0]
-        top_k = min(5, len(knowledge_chunks))
-        top_results = torch.topk(cos_scores, k=top_k)
-        top_chunks = [
-            (knowledge_chunks[idx], score.item())
-            for idx, score in zip(top_results.indices, top_results.values)
-            if score.item() > 0.6
-        ]
-        if not top_chunks:
-            with open("unanswered.txt", "a", encoding="utf-8") as f:
-                f.write(f"{question}\n")
-        return generate_response(question, top_chunks)
-    except Exception as e:
-        logger.error(f"خطأ أثناء توليد الإجابة: {e}")
-        return "حدث خطأ غير متوقع أثناء توليد الإجابة."
 # واجهة Gradio
 with gr.Blocks(css=".arabic-ui {direction: rtl; text-align: right; font-family: Tahoma;}") as demo:
     with gr.Column(elem_classes="arabic-ui"):
-        gr.Markdown("### المساعد الذكي لوحدة الشفافية\nاسأل عن الموازنة، المشاركة المجتمعية، أو الشفافية المالية.")
-        question = gr.Textbox(label="سؤالك", placeholder="مثال: ما هي أهداف الموازنة التشاركية؟", lines=3)
-        submit_btn = gr.Button("إرسال السؤال", variant="primary")
-        answer = gr.Textbox(label="الإجابة", lines=12, interactive=False)
-        gr.Examples(
-            examples=[
-                ["ما هي أهداف الموازنة التشاركية؟"],
-                ["من هي رئيسة وحدة الشفافية؟"],
-                ["ما هو دور المواطن في إعداد الموازنة؟"]
-            ],
-            inputs=question
-        )
-        submit_btn.click(answer_question, inputs=question, outputs=answer)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
+import faiss
+import numpy as np
 import torch
 import logging
 import re
 import os
 from typing import List, Tuple
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# تحميل النموذج
+model = SentenceTransformer("UBC-NLP/MARBERT", device="cuda" if torch.cuda.is_available() else "cpu")
+logger.info("✅ النموذج محمّل")
+# تحميل وتنظيف المعرفة
+knowledge_chunks = []
+section = "عام"
+if os.path.exists("knowledge.txt"):
     with open("knowledge.txt", encoding="utf-8") as f:
         for line in f:
             line = line.strip()
             if line.startswith("##"):
+                section = line.replace("##", "").strip()
+            elif line and len(line) > 10:
+                knowledge_chunks.append(f"{section}: {line}")
+else:
+    knowledge_chunks = ["عام: لا يوجد محتوى معرفي."]
+# التضمينات + FAISS
+knowledge_embeddings = model.encode(knowledge_chunks, convert_to_numpy=True, normalize_embeddings=True)
+index = faiss.IndexFlatIP(knowledge_embeddings.shape[1])
+index.add(knowledge_embeddings)
+# تنظيف السؤال
 def preprocess_question(question: str) -> str:
     question = re.sub(r'[؟\?،,\.]', '', question).strip()
     replacements = {
         question = re.sub(pattern, repl, question)
     return question
+# البحث الدلالي
+def semantic_search(question: str, top_k=5) -> List[Tuple[str, float]]:
+    embedding = model.encode(question, convert_to_numpy=True, normalize_embeddings=True)
+    scores, indices = index.search(np.array([embedding]), top_k)
+    results = []
+    for i, score in zip(indices[0], scores[0]):
+        if score > 0.6:
+            results.append((knowledge_chunks[i], float(score)))
+    return results
 # توليد الرد
+def generate_response(question: str, results: List[Tuple[str, float]]) -> str:
+    if not results:
+        return "لم أتمكن من العثور على إجابة مباشرة. حاول إعادة صياغة سؤالك."
     sections = {}
+    for chunk, score in results:
         if ":" in chunk:
             section, content = chunk.split(":", 1)
         else:
         sections[section].append((content.strip(), score))
     main_section = max(sections, key=lambda s: sum(x[1] for x in sections[s]) / len(sections[s]))
     response = f"سؤالك: {question}\n\n"
+    response += f"{main_section}:
+"
     for content, _ in sorted(sections[main_section], key=lambda x: x[1], reverse=True):
+        if len(content) > 15:
+            response += f"- {content}\n"
     return response
+# دالة الاستجابة
+def answer_question(q: str) -> str:
+    if not q or len(q.strip()) < 3:
+        return "يرجى إدخال سؤال واضح مكون من 3 كلمات أو أكثر."
+    q_clean = preprocess_question(q)
+    results = semantic_search(q_clean)
+    return generate_response(q_clean, results)
 # واجهة Gradio
 with gr.Blocks(css=".arabic-ui {direction: rtl; text-align: right; font-family: Tahoma;}") as demo:
     with gr.Column(elem_classes="arabic-ui"):
+        gr.Markdown("""
+        ### المساعد الذكي للموازنة التشاركية
+        اطرح سؤالك حول وحدة الشفافية أو المشاريع أو المؤشرات الدولية.
+        """)
+        question = gr.Textbox(label="سؤالك", placeholder="مثال: من هي رئيسة وحدة الشفافية؟", lines=3)
+        submit = gr.Button("إرسال")
+        answer = gr.Textbox(label="الإجابة", lines=10, interactive=False)
+        gr.Examples([
+            ["من هي رئيسة وحدة الشفافية؟"],
+            ["ما هي أهداف الموازنة التشاركية؟"],
+            ["كم حصلت مصر في مؤشر الشفافية؟"]
+        ], inputs=question)
+        submit.click(fn=answer_question, inputs=question, outputs=answer)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False)