Spaces:

mo-456
/

FTCE-chatbot

Sleeping

App Files Files Community

mo-456 commited on Jul 15, 2025

Commit

9717c6f

verified ·

1 Parent(s): a9c2d55

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -265

app.py CHANGED Viewed

@@ -3,255 +3,142 @@ from sentence_transformers import SentenceTransformer, util
 import torch
 import logging
 from typing import List
-# Set up logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Load model
-logger.info("Loading model...")
-try:
-    model = SentenceTransformer(
-        "CAMeL-Lab/bert-base-arabic-camelbert-ca",
-        device="cpu",
-        cache_folder="./model_cache"
-    )
-except Exception as e:
-    logger.error(f"Failed to load model: {e}")
-    raise
-# Load knowledge file
-logger.info("Loading knowledge file...")
-try:
     with open("knowledge.txt", "r", encoding="utf-8") as f:
-        knowledge_text = f.read()
-    if not knowledge_text.strip():
-        raise ValueError("knowledge.txt is empty")
-except Exception as e:
-    logger.error(f"Error reading knowledge.txt: {e}")
-    raise
-def split_text(text: str, chunk_size: int = 300) -> List[str]:
-    """Split text into meaningful semantic chunks"""
-    sections = []
-    current_section = []
-    current_length = 0
-    for section in text.split("\n\n---\n\n"):
-        section = section.strip()
-        if not section:
-            continue
-        for para in section.split("\n\n"):
-            para = para.strip()
-            if not para:
-                continue
-            if current_length + len(para) > chunk_size and current_section:
-                sections.append("\n\n".join(current_section))
-                current_section = []
-                current_length = 0
-            current_section.append(para)
-            current_length += len(para)
-    if current_section:
-        sections.append("\n\n".join(current_section))
-    return sections
-logger.info("Generating embeddings...")
-try:
-    chunks = split_text(knowledge_text)
-    corpus_embeddings = model.encode(chunks, convert_to_tensor=True, batch_size=8)
-except Exception as e:
-    logger.error(f"Error generating embeddings: {e}")
-    raise
-def generate_comprehensive_answer(question: str, top_chunks: List[tuple]) -> str:
-    """Generate a well-structured, comprehensive answer"""
-    # Header with unit name
-    answer = "## المساعد الآلي لوحدة الشفافية\n\n"
-    # Add introduction based on question type
-    if "؟" in question:
-        answer += "بناءً على سؤالك، إليك الإجابة التفصيلية:\n\n"
     else:
-        answer += "فيما يلي المعلومات المطلوبة:\n\n"
-    # Add content from relevant chunks
     for chunk, score in top_chunks:
-        if score > 0.4:  # Only include good matches
-            answer += f"{chunk}\n\n"
-    # Add confidence indication
-    best_score = top_chunks[0][1] if top_chunks else 0
-    confidence = "عالية جدًا" if best_score > 0.8 else "عالية" if best_score > 0.6 else "متوسطة"
-    answer += f"\n**مستوى الدقة:** {confidence} ({best_score:.2f})"
-    # Add footer with suggestions
-    answer += "\n\nلمزيد من المعلومات، يمكنك:\n"
-    answer += "- طرح سؤال أكثر تحديدًا\n"
-    answer += "- الرجوع إلى الوثائق الرسمية\n"
-    answer += "- التواصل مع وحدة الشفافية مباشرة"
-    return answer
-def answer_question(question: str) -> str:
     try:
         if not question.strip():
-            return "## المساعد الآلي لوحدة الشفافية\n\nالرجاء إدخال سؤال واضح ومحدد."
-        # Preprocess question
-        question = question.strip().replace("؟", "").strip()
-        # Get embeddings
         question_embedding = model.encode(question, convert_to_tensor=True)
-        scores = util.cos_sim(question_embedding, corpus_embeddings)[0]
-        # Get top relevant chunks
-        top_k = min(5, len(chunks))
-        top_results = torch.topk(scores, k=top_k)
-        # Prepare chunks with scores
-        relevant_chunks = [
-            (chunks[idx], score.item())
-            for score, idx in zip(top_results[0], top_results[1])
-            if score > 0.3  # Minimum relevance threshold
-        ]
-        if relevant_chunks:
-            return generate_comprehensive_answer(question, relevant_chunks)
-        else:
-            return "## المساعد الآلي لوحدة الشفافية\n\nعذرًا، لم أتمكن من العثور على إجابة دقيقة. يرجى:\n- صياغة السؤال بطريقة أخرى\n- استخدام مصطلحات مختلفة\n- تقديم مزيد من التفاصيل في سؤالك"
     except Exception as e:
-        logger.error(f"Error answering question: {e}")
-        return "## المساعد الآلي لوحدة الشفافية\n\nحدث خطأ غير متوقع. يرجى المحاولة مرة أخرى أو التواصل مع الدعم الفني."
-# Sleek Black and White UI
 css = """
-body {
-    background-color: #000000 !important;
-    color: #ffffff !important;
-}
 .arabic-ui {
     direction: rtl;
     text-align: right;
-    font-family: 'Tahoma', 'Arial', sans-serif;
-    background-color: #000000;
-    color: #ffffff;
 }
 .header {
-    background-color: #000000;
-    color: #ffffff;
     padding: 20px;
     border-radius: 8px;
-    margin-bottom: 20px;
-    border-bottom: 2px solid #ffffff;
-}
-.answer-container {
-    background-color: #121212;
-    color: #ffffff;
-    padding: 25px;
-    border-radius: 10px;
-    border-right: 3px solid #ffffff;
-    margin-bottom: 20px;
-}
-.question-input {
-    background-color: #121212;
-    color: #ffffff;
-    border: 2px solid #333333;
-    border-radius: 8px;
-    padding: 15px;
-    font-size: 16px;
-    min-height: 120px;
-}
-.question-input:focus {
-    border-color: #ffffff;
-    box-shadow: 0 0 0 2px rgba(255, 255, 255, 0.2);
-}
-.submit-btn {
-    background-color: #333333;
-    color: #ffffff !important;
-    border: 1px solid #ffffff;
-    padding: 12px 30px;
-    font-size: 16px;
-    border-radius: 8px;
-    transition: all 0.3s;
-}
-.submit-btn:hover {
-    background-color: #555555;
-    transform: translateY(-2px);
-}
-.clear-btn {
-    background-color: #333333;
-    color: #ffffff !important;
-    border: 1px solid #ffffff;
-    padding: 12px 30px;
-    font-size: 16px;
-    border-radius: 8px;
-    transition: all 0.3s;
-}
-.clear-btn:hover {
-    background-color: #555555;
-    transform: translateY(-2px);
-}
-.examples-container {
-    background-color: #121212;
-    padding: 15px;
-    border-radius: 8px;
-    margin-bottom: 20px;
-    border: 1px solid #333333;
-}
-.examples-label {
-    color: #ffffff;
-    font-weight: bold;
-    margin-bottom: 10px;
-}
-.example-btn {
-    background-color: #333333;
-    color: #ffffff;
-    border: 1px solid #555555;
-    margin: 5px;
-    border-radius: 6px;
-    transition: all 0.2s;
-}
-.example-btn:hover {
-    background-color: #555555;
-}
-.markdown-text {
-    color: #ffffff;
-    line-height: 1.8;
-    font-size: 16px;
-}
-.markdown-text h1, .markdown-text h2, .markdown-text h3 {
-    color: #ffffff;
-    margin-top: 20px;
-    margin-bottom: 15px;
-}
-.markdown-text strong {
-    color: #dddddd;
-}
-label {
-    color: #ffffff !important;
 }
 """
@@ -259,58 +146,24 @@ with gr.Blocks(css=css) as demo:
     with gr.Column(elem_classes="arabic-ui"):
         gr.Markdown("""
         <div class="header">
-        <h1 style="text-align: center; margin-bottom: 10px;">المساعد الآلي لوحدة الشفافية</h1>
-        <h3 style="text-align: center; font-weight: normal;">نظام الإجابة الآلي للاستفسارات المتعلقة بالشفافية والموازنة التشاركية</h3>
         </div>
         """)
-        with gr.Row():
-            question_input = gr.Textbox(
-                label="نص السؤال",
-                placeholder="مثال: ما هي آليات المشاركة المجتمعية في الموازنة التشاركية؟",
-                lines=3,
-                max_lines=5,
-                elem_classes="question-input"
-            )
-        answer_output = gr.Markdown(
-            label="الإجابة",
-            elem_classes=["answer-container", "markdown-text"]
         )
-        with gr.Column(elem_classes="examples-container"):
-            gr.Markdown("أسئلة نموذجية", elem_classes="examples-label")
-            examples = gr.Examples(
-                examples=[
-                    ["ما هي الركائز الأساسية للنموذج المصري للموازنة التشاركية؟"],
-                    ["كيف تقيس وحدة الشفافية مستوى رضا المواطنين؟"],
-                    ["ما هي أحدث إحصائيات مؤشر شفافية الموازنة المفتوحة؟"]
-                ],
-                inputs=question_input,
-                elem_id="example-buttons",
-                examples_per_page=3
-            )
-        with gr.Row():
-            submit_btn = gr.Button("الحصول على الإجابة",
-                                 elem_classes="submit-btn")
-            clear_btn = gr.Button("مسح النموذج",
-                                elem_classes="clear-btn")
-        submit_btn.click(
-            fn=answer_question,
-            inputs=question_input,
-            outputs=answer_output
-        )
-        clear_btn.click(
-            lambda: ("", ""),
-            inputs=None,
-            outputs=[question_input, answer_output]
-        )
-demo.launch(
-    server_name="0.0.0.0",
-    server_port=7860,
-    show_error=True
-)

 import torch
 import logging
 from typing import List
+import re
+import numpy as np
+# Configure advanced logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
 logger = logging.getLogger(__name__)
+# Load model with enhanced settings
+model = SentenceTransformer(
+    "CAMeL-Lab/bert-base-arabic-camelbert-ca",
+    device="cuda" if torch.cuda.is_available() else "cpu"
+)
+# Advanced knowledge loader with semantic organization
+def load_knowledge():
     with open("knowledge.txt", "r", encoding="utf-8") as f:
+        sections = {}
+        current_section = ""
+        for line in f:
+            line = line.strip()
+            if line.startswith("## "):
+                current_section = line[3:]
+                sections[current_section] = []
+            elif line and current_section:
+                sections[current_section].append(line)
+        # Create semantic chunks
+        chunks = []
+        chunk_ids = []
+        for section, content in sections.items():
+            section_text = " ".join(content)
+            sentences = re.split(r'[\.\n]', section_text)
+            current_chunk = ""
+            for sent in sentences:
+                sent = sent.strip()
+                if not sent:
+                    continue
+                if len(current_chunk) + len(sent) < 200:
+                    current_chunk += sent + ". "
+                else:
+                    chunks.append(f"{section}: {current_chunk.strip()}")
+                    chunk_ids.append(section)
+                    current_chunk = sent + ". "
+            if current_chunk:
+                chunks.append(f"{section}: {current_chunk.strip()}")
+                chunk_ids.append(section)
+        return chunks, chunk_ids
+knowledge_chunks, chunk_categories = load_knowledge()
+knowledge_embeddings = model.encode(knowledge_chunks, convert_to_tensor=True)
+# Advanced Arabic response generator
+def generate_arabic_response(question, top_chunks):
+    response = "المساعد الآلي لوحدة الشفافية\n\n"
+    # Analyze question type
+    question_type = "عام"  # default
+    q_words = question.split()
+    if any(w in ["كيف", "طريقة", "خطوات"] for w in q_words):
+        question_type = "إجرائي"
+    elif any(w in ["ما هي", "ما هو", "تعريف"] for w in q_words):
+        question_type = "تعريفي"
+    elif any(w in ["لماذا", "سبب", "أسباب"] for w in q_words):
+        question_type = "تفسيري"
+    # Generate context-aware response
+    if question_type == "تعريفي":
+        response += "بناءً على سؤالك عن المفاهيم الأساسية:\n\n"
+    elif question_type == "إجرائي":
+        response += "لتنفيذ ما تبحث عنه، إليك الخطوات العملية:\n\n"
     else:
+        response += "إليك الإجابة الشاملة على سؤالك:\n\n"
+    # Build comprehensive answer
+    used_sections = set()
     for chunk, score in top_chunks:
+        section = chunk.split(":")[0]
+        if section not in used_sections and score > 0.35:
+            response += f"• {chunk}\n\n"
+            used_sections.add(section)
+    # Add intelligent follow-up
+    if len(used_sections) > 1:
+        response += "\nهذه المعلومات مترابطة حيث أن "
+        response += " و".join(list(used_sections)[:3]) + " جوانب متكاملة."
+    return response
+def answer_question(question):
     try:
         if not question.strip():
+            return "الرجاء إدخال سؤال واضح ومحدد"
+        # Arabic question preprocessing
+        question = re.sub(r'[؟\?]', '', question).strip()
         question_embedding = model.encode(question, convert_to_tensor=True)
+        # Semantic search with diversity
+        cos_scores = util.cos_sim(question_embedding, knowledge_embeddings)[0]
+        top_k = min(5, len(knowledge_chunks))
+        # Get diverse results from different sections
+        top_indices = torch.topk(cos_scores, k=top_k).indices.tolist()
+        top_chunks = [(knowledge_chunks[idx], cos_scores[idx].item())
+                     for idx in top_indices if cos_scores[idx] > 0.3]
+        if not top_chunks:
+            return "لم أجد إجابة دقيقة، لكن يمكنك:\n- صياغة السؤال بطريقة أخرى\n- الرجوع للوثائق الرسمية"
+        return generate_arabic_response(question, top_chunks)
     except Exception as e:
+        logger.error(f"Error: {str(e)}")
+        return "حدث خطأ تقني، يرجى المحاولة لاحقاً"
+# Modern Arabic UI
 css = """
 .arabic-ui {
     direction: rtl;
     text-align: right;
+    font-family: 'Tahoma', sans-serif;
 }
 .header {
+    background: #2c3e50;
+    color: white;
     padding: 20px;
     border-radius: 8px;
 }
 """
     with gr.Column(elem_classes="arabic-ui"):
         gr.Markdown("""
         <div class="header">
+        <h2>المساعد الآلي لوحدة الشفافية</h2>
+        <p>نظام ذكي لفهم واستجابة استفساراتك باللغة العربية</p>
         </div>
         """)
+        question = gr.Textbox(label="اكتب سؤالك هنا", placeholder="مثال: ما هي مراحل الموازنة التشاركية؟")
+        answer = gr.Textbox(label="الإجابة", interactive=False)
+        gr.Examples(
+            examples=[
+                ["ما هي أهداف التنمية المستدامة الرئيسية؟"],
+                ["كيف يمكن المشاركة في الموازنة التشاركية؟"],
+                ["ما دور ديوان المحاسبة في تحقيق الشفافية؟"]
+            ],
+            inputs=question
         )
+        submit = gr.Button("الحصول على إجابة ذكية")
+        submit.click(answer_question, inputs=question, outputs=answer)
+demo.launch(server_name="0.0.0.0", server_port=7860)