Spaces:

mo-456
/

FTCE-chatbot

Sleeping

App Files Files Community

mo-456 commited on Jul 15, 2025

Commit

989f90a

verified ·

1 Parent(s): 222dde1

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -65

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import logging
 from typing import List
 import re
 import numpy as np
 # Configure advanced logging
 logging.basicConfig(
@@ -13,29 +14,65 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Load model with enhanced settings
-model = SentenceTransformer(
-    "CAMeL-Lab/bert-base-arabic-camelbert-ca",
-    device="cuda" if torch.cuda.is_available() else "cpu"
-)
-# Advanced knowledge loader with semantic organization
 def load_knowledge():
-    with open("knowledge.txt", "r", encoding="utf-8") as f:
         sections = {}
         current_section = ""
-        for line in f:
-            line = line.strip()
-            if line.startswith("## "):
-                current_section = line[3:]
-                sections[current_section] = []
-            elif line and current_section:
-                sections[current_section].append(line)
-        # Create semantic chunks
         chunks = []
         chunk_ids = []
         for section, content in sections.items():
             section_text = " ".join(content)
             sentences = re.split(r'[\.\n]', section_text)
@@ -58,65 +95,87 @@ def load_knowledge():
                 chunk_ids.append(section)
         return chunks, chunk_ids
-knowledge_chunks, chunk_categories = load_knowledge()
-knowledge_embeddings = model.encode(knowledge_chunks, convert_to_tensor=True)
-# Advanced Arabic response generator
 def generate_arabic_response(question, top_chunks):
-    response = "المساعد الآلي لوحدة الشفافية\n\n"
-    # Analyze question type
-    question_type = "عام"  # default
-    q_words = question.split()
-    if any(w in ["كيف", "طريقة", "خطوات"] for w in q_words):
-        question_type = "إجرائي"
-    elif any(w in ["ما هي", "ما هو", "تعريف"] for w in q_words):
-        question_type = "تعريفي"
-    elif any(w in ["لماذا", "سبب", "أسباب"] for w in q_words):
-        question_type = "تفسيري"
-    # Generate context-aware response
-    if question_type == "تعريفي":
-        response += "بناءً على سؤالك عن المفاهيم الأساسية:\n\n"
-    elif question_type == "إجرائي":
-        response += "لتنفيذ ما تبحث عنه، إليك الخطوات العملية:\n\n"
-    else:
-        response += "إليك الإجابة الشاملة على سؤالك:\n\n"
-    # Build comprehensive answer
-    used_sections = set()
-    for chunk, score in top_chunks:
-        section = chunk.split(":")[0]
-        if section not in used_sections and score > 0.35:
-            response += f"• {chunk}\n\n"
-            used_sections.add(section)
-    # Add intelligent follow-up
-    if len(used_sections) > 1:
-        response += "\nهذه المعلومات مترابطة حيث أن "
-        response += " و".join(list(used_sections)[:3]) + " جوانب متكاملة."
-    return response
 def answer_question(question):
     try:
         if not question.strip():
             return "الرجاء إدخال سؤال واضح ومحدد"
         # Arabic question preprocessing
         question = re.sub(r'[؟\?]', '', question).strip()
         question_embedding = model.encode(question, convert_to_tensor=True)
-        # Semantic search with diversity
         cos_scores = util.cos_sim(question_embedding, knowledge_embeddings)[0]
         top_k = min(5, len(knowledge_chunks))
-        # Get diverse results from different sections
         top_indices = torch.topk(cos_scores, k=top_k).indices.tolist()
-        top_chunks = [(knowledge_chunks[idx], cos_scores[idx].item())
-                     for idx in top_indices if cos_scores[idx] > 0.3]
         if not top_chunks:
             return "لم أجد إجابة دقيقة، لكن يمكنك:\n- صياغة السؤال بطريقة أخرى\n- الرجوع للوثائق الرسمية"
@@ -124,9 +183,15 @@ def answer_question(question):
         return generate_arabic_response(question, top_chunks)
     except Exception as e:
-        logger.error(f"Error: {str(e)}")
         return "حدث خطأ تقني، يرجى المحاولة لاحقاً"
 # Modern Arabic UI
 css = """
 .arabic-ui {
@@ -139,6 +204,12 @@ css = """
     color: white;
     padding: 20px;
     border-radius: 8px;
 }
 """
@@ -151,8 +222,16 @@ with gr.Blocks(css=css) as demo:
         </div>
         """)
-        question = gr.Textbox(label="اكتب سؤالك هنا", placeholder="مثال: ما هي مراحل الموازنة التشاركية؟")
-        answer = gr.Textbox(label="الإجابة", interactive=False)
         gr.Examples(
             examples=[
@@ -160,10 +239,22 @@ with gr.Blocks(css=css) as demo:
                 ["كيف يمكن المشاركة في الموازنة التشاركية؟"],
                 ["ما دور ديوان المحاسبة في تحقيق الشفافية؟"]
             ],
-            inputs=question
         )
         submit = gr.Button("الحصول على إجابة ذكية")
         submit.click(answer_question, inputs=question, outputs=answer)
-demo.launch(server_name="0.0.0.0", server_port=7860)

 from typing import List
 import re
 import numpy as np
+import os
 # Configure advanced logging
 logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
+# Initialize variables
+model = None
+knowledge_chunks = []
+knowledge_embeddings = None
+def initialize_components():
+    """Initialize model and knowledge base with error handling"""
+    global model, knowledge_chunks, knowledge_embeddings
+    # Model loading with fallback
+    try:
+        model = SentenceTransformer(
+            "CAMeL-Lab/bert-base-arabic-camelbert-ca",
+            device="cuda" if torch.cuda.is_available() else "cpu"
+        )
+        logger.info(f"Model loaded on device: {model.device}")
+    except Exception as e:
+        logger.error(f"Model loading failed: {str(e)}")
+        raise RuntimeError("Failed to initialize the AI model")
+    # Knowledge base loading
+    try:
+        knowledge_chunks, _ = load_knowledge()
+        if not knowledge_chunks:
+            raise ValueError("No knowledge chunks loaded - check knowledge.txt")
+        knowledge_embeddings = model.encode(knowledge_chunks, convert_to_tensor=True)
+        logger.info(f"Successfully loaded {len(knowledge_chunks)} knowledge chunks")
+    except Exception as e:
+        logger.error(f"Knowledge base loading failed: {str(e)}")
+        raise RuntimeError("Failed to initialize knowledge base")
 def load_knowledge():
+    """Load and process knowledge file with validation"""
+    try:
+        if not os.path.exists("knowledge.txt"):
+            raise FileNotFoundError("knowledge.txt file not found")
+        with open("knowledge.txt", "r", encoding="utf-8") as f:
+            content = f.read().strip()
+            if not content:
+                raise ValueError("knowledge.txt is empty")
+        # Process knowledge file
         sections = {}
         current_section = ""
+        with open("knowledge.txt", "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith("## "):
+                    current_section = line[3:]
+                    sections[current_section] = []
+                elif line and current_section:
+                    sections[current_section].append(line)
         chunks = []
         chunk_ids = []
         for section, content in sections.items():
             section_text = " ".join(content)
             sentences = re.split(r'[\.\n]', section_text)
                 chunk_ids.append(section)
         return chunks, chunk_ids
+    except Exception as e:
+        logger.error(f"Error loading knowledge: {str(e)}")
+        raise
 def generate_arabic_response(question, top_chunks):
+    """Generate response with validation"""
+    try:
+        if not top_chunks:
+            return "لم أجد معلومات كافية للإجابة على سؤالك"
+        response = "المساعد الآلي لوحدة الشفافية\n\n"
+        # Analyze question type
+        question_type = "عام"
+        q_words = question.split()
+        if any(w in ["كيف", "طريقة", "خطوات"] for w in q_words):
+            question_type = "إجرائي"
+        elif any(w in ["ما هي", "ما هو", "تعريف"] for w in q_words):
+            question_type = "تعريفي"
+        elif any(w in ["لماذا", "سبب", "أسباب"] for w in q_words):
+            question_type = "تفسيري"
+        # Generate context-aware response
+        if question_type == "تعريفي":
+            response += "بناءً على سؤالك عن المفاهيم الأساسية:\n\n"
+        elif question_type == "إجرائي":
+            response += "لتنفيذ ما تبحث عنه، إليك الخطوات العملية:\n\n"
+        else:
+            response += "إليك الإجابة الشاملة على سؤالك:\n\n"
+        # Build comprehensive answer
+        used_sections = set()
+        for chunk, score in top_chunks:
+            section = chunk.split(":")[0]
+            if section not in used_sections and score > 0.35:
+                response += f"• {chunk}\n\n"
+                used_sections.add(section)
+        # Add intelligent follow-up
+        if len(used_sections) > 1:
+            response += "\nهذه المعلومات مترابطة حيث أن "
+            response += " و".join(list(used_sections)[:3]) + " جوانب متكاملة."
+        return response
+    except Exception as e:
+        logger.error(f"Error generating response: {str(e)}")
+        return "حدث خطأ أثناء توليد الإجابة"
 def answer_question(question):
+    """Main question answering function with comprehensive error handling"""
     try:
         if not question.strip():
             return "الرجاء إدخال سؤال واضح ومحدد"
+        # Validate components are loaded
+        if model is None or not knowledge_chunks:
+            initialize_components()
         # Arabic question preprocessing
         question = re.sub(r'[؟\?]', '', question).strip()
+        logger.info(f"Processing question: '{question}'")
+        # Encode question
         question_embedding = model.encode(question, convert_to_tensor=True)
+        logger.info("Question encoded successfully")
+        # Semantic search
         cos_scores = util.cos_sim(question_embedding, knowledge_embeddings)[0]
         top_k = min(5, len(knowledge_chunks))
         top_indices = torch.topk(cos_scores, k=top_k).indices.tolist()
+        top_chunks = [
+            (knowledge_chunks[idx], cos_scores[idx].item())
+            for idx in top_indices
+            if cos_scores[idx] > 0.3
+        ]
+        logger.info(f"Found {len(top_chunks)} relevant chunks (max score: {max(cos_scores).item():.2f})")
         if not top_chunks:
             return "لم أجد إجابة دقيقة، لكن يمكنك:\n- صياغة السؤال بطريقة أخرى\n- الرجوع للوثائق الرسمية"
         return generate_arabic_response(question, top_chunks)
     except Exception as e:
+        logger.error(f"Error answering question: {str(e)}", exc_info=True)
         return "حدث خطأ تقني، يرجى المحاولة لاحقاً"
+# Initialize components when starting
+try:
+    initialize_components()
+except Exception as e:
+    logger.error(f"Initialization failed: {str(e)}")
 # Modern Arabic UI
 css = """
 .arabic-ui {
     color: white;
     padding: 20px;
     border-radius: 8px;
+    margin-bottom: 20px;
+}
+.footer {
+    margin-top: 20px;
+    font-size: 0.9em;
+    color: #666;
 }
 """
         </div>
         """)
+        question = gr.Textbox(
+            label="اكتب سؤالك هنا",
+            placeholder="مثال: ما هي مراحل الموازنة التشاركية؟",
+            lines=3
+        )
+        answer = gr.Textbox(
+            label="الإجابة",
+            interactive=False,
+            lines=10
+        )
         gr.Examples(
             examples=[
                 ["كيف يمكن المشاركة في الموازنة التشاركية؟"],
                 ["ما دور ديوان المحاسبة في تحقيق الشفافية؟"]
             ],
+            inputs=question,
+            label="أسئلة مثاليه"
         )
         submit = gr.Button("الحصول على إجابة ذكية")
         submit.click(answer_question, inputs=question, outputs=answer)
+        gr.Markdown("""
+        <div class="footer">
+        <p>لأي استفسارات تقنية، يرجى التواصل مع فريق الدعم</p>
+        </div>
+        """)
+# Launch with error handling
+try:
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+except Exception as e:
+    logger.error(f"Failed to launch app: {str(e)}")
+    print(f"Failed to launch app: {str(e)}")