Spaces:

afouda
/

EduNativesChatbot

Runtime error

App Files Files Community

afouda commited on Sep 15, 2025

Commit

18a171e

verified ·

1 Parent(s): 091d35b

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -19

app.py CHANGED Viewed

@@ -66,7 +66,8 @@ _SKILL_REGEX = re.compile(r"\b(Natural Language Processing|Building Information
 def extract_skills_from_text(cv_text: str) -> List[str]:
     skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
-    return [s.capitalize() for s in skills]
 # --- Process uploaded file (PDF, DOCX, TXT) ---
 def process_uploaded_file(file_obj: Any) -> dict | None:
@@ -222,33 +223,110 @@ ensure_collections()
 # -------------------- Query Weaviate --------------------
 def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
     try:
         collection = weaviate_client.collections.get(class_name)
-        # BM25 keyword search
-        response = collection.query.bm25(query=query_text, limit=limit)
         items = [obj.properties for obj in response.objects]
-        # fallback filter if nothing found
-        if not items:
-            filters = Filter.any_of([
-                Filter.by_property("title").like(f"*{query_text}*"),
-                Filter.by_property("description").like(f"*{query_text}*")
-            ])
-            if class_name != "Team":
-                filters = Filter.any_of([
-                    Filter.by_property("title").like(f"*{query_text}*"),
-                    Filter.by_property("skills").like(f"*{query_text}*"),
-                    Filter.by_property("description").like(f"*{query_text}*")
-                ])
-            response_fallback = collection.query.fetch_objects(limit=limit, filters=filters)
-            items = [obj.properties for obj in response_fallback.objects]
         return items
     except Exception as e:
         print(f"[Weaviate Query Error] {e}")
         return []
 # -------------------- RAG Prompt Builder --------------------
 def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
     context_parts = []
@@ -811,6 +889,7 @@ with gr.Blocks(css="""
     with gr.Row():
         clear_btn = gr.Button("Reset Conversation")
         instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
     # persistent state across turns
     chat_history_state = gr.State([])

 def extract_skills_from_text(cv_text: str) -> List[str]:
     skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
+    return [s.capitalize() for s in skills
 # --- Process uploaded file (PDF, DOCX, TXT) ---
 def process_uploaded_file(file_obj: Any) -> dict | None:
 # -------------------- Query Weaviate --------------------
 def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
+    """
+    Performs a hybrid search on a Weaviate collection to get more relevant results
+    for conversational queries.
+    """
     try:
         collection = weaviate_client.collections.get(class_name)
+        # Use hybrid search: combines vector (semantic) and keyword (BM25) search
+        response = collection.query.hybrid(
+            query=query_text,
+            limit=limit,
+            # For job searches, prioritize matching the title
+            query_properties=["title^2", "description", "skills"] if class_name == "Job" else None
+        )
         items = [obj.properties for obj in response.objects]
         return items
     except Exception as e:
         print(f"[Weaviate Query Error] {e}")
         return []
+# -------------------- NEW: Search All Collections --------------------
+# -------------------- RAG Answer (Modified for Multi-Class Search) --------------------
+def rag_answer_all(user_question: str, top_k: int = 3) -> (str, list[dict]):
+    # Step 1: Search across all relevant collections
+    retrieved_items = search_all_collections(user_question, limit_per_class=top_k)
+    if not retrieved_items:
+        return f"Sorry, I couldn't find any results related to '{user_question}' in our Jobs, Projects, or Opportunities databases.", []
+    # Step 2: Build a new prompt that handles multiple sources
+    context_parts = []
+    # Group results by class for clearer presentation in the prompt
+    grouped_results = {}
+    for item in retrieved_items:
+        class_name = item["class_name"]
+        if class_name not in grouped_results:
+            grouped_results[class_name] = []
+        grouped_results[class_name].append(item["properties"])
+    for class_name, items in grouped_results.items():
+        context_parts.append(f"\n--- Results from '{class_name}' collection ---")
+        for i, properties in enumerate(items, 1):
+            details = {k: str(v) for k, v in properties.items()}
+            item_str = f"Record {i}:\n{json.dumps(details, indent=2, ensure_ascii=False)}"
+            context_parts.append(item_str)
+    context_block = "\n".join(context_parts)
+    prompt = f"""
+    User Question: "{user_question}"
+    You are an expert AI assistant. Your mission is to analyze structured data from different categories (Jobs, Projects, Opportunities) and present a comprehensive, clear summary to the user.
+    **Primary Directive:** Your ONLY source of information is the structured JSON data provided below under "Retrieved Data". If the data section is empty, state that no results were found.
+    **Your Core Instructions:**
+    1.  **Acknowledge the Categories:** Analyze all the data provided from each collection (`Job`, `Project`, `Opportunities`).
+    2.  **Summarize Logically:** For each result, **you must clearly state which category it belongs to**. For example, start with "I found a **Job** opportunity:" or "Here is a **Project** you might be interested in:".
+    3.  **Present All Details:** Convert the data for each item into natural, readable language, covering all important details like title, company/creator, description, and skills.
+    4.  **Use Clear Formatting:** Use Markdown headings (e.g., `### Job: [Title]`) and bullet points to make the response easy to read.
+    Retrieved Data:
+    {context_block}
+    """
+    # Step 3: Call the LLM to get the final answer
+    try:
+        resp = llm_client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT_BASE},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.3,
+            max_tokens=4096
+        )
+        answer = resp.choices[0].message.content or ""
+    except Exception as e:
+        print(f"[RAG LLM Error] {e}")
+        answer = "⚠️ Sorry, I couldn't process that. Try again later."
+    return answer, retrieved_items
+# ========== IDLE STATE ==========
+    if st == "idle":
+        low = text.lower()
+        # ... (The first parts for greetings and flow starters remain the same)
+        # ... (e.g., if any(k in low for k in ["apply",...])
+        # ... (e.g., if any(k in low for k in ["team",...])
+        # ... (e.g., if any(k in low for k in ["recommend",...])
+        # 3) Check for specific Knowledge Base intents
+        intent = route_intent(text)
+        if intent and intent.startswith("kb_"):
+            kb_ans = kb_fallback(intent)
+            if kb_ans:
+                return kb_ans, session, False
+        # 4) If it's not a command or KB question, perform a global RAG search
+        # This is now the default action for any general query.
+        rag_ans, _ = rag_answer_all(text)
+        return rag_ans, session, False
 # -------------------- RAG Prompt Builder --------------------
 def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
     context_parts = []
     with gr.Row():
         clear_btn = gr.Button("Reset Conversation")
         instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
     # persistent state across turns
     chat_history_state = gr.State([])