afouda commited on
Commit
18a171e
·
verified ·
1 Parent(s): 091d35b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -19
app.py CHANGED
@@ -66,7 +66,8 @@ _SKILL_REGEX = re.compile(r"\b(Natural Language Processing|Building Information
66
 
67
  def extract_skills_from_text(cv_text: str) -> List[str]:
68
  skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
69
- return [s.capitalize() for s in skills]
 
70
 
71
  # --- Process uploaded file (PDF, DOCX, TXT) ---
72
  def process_uploaded_file(file_obj: Any) -> dict | None:
@@ -222,33 +223,110 @@ ensure_collections()
222
 
223
  # -------------------- Query Weaviate --------------------
224
  def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
 
 
 
 
225
  try:
226
  collection = weaviate_client.collections.get(class_name)
227
 
228
- # BM25 keyword search
229
- response = collection.query.bm25(query=query_text, limit=limit)
 
 
 
 
 
 
230
  items = [obj.properties for obj in response.objects]
231
-
232
- # fallback filter if nothing found
233
- if not items:
234
- filters = Filter.any_of([
235
- Filter.by_property("title").like(f"*{query_text}*"),
236
- Filter.by_property("description").like(f"*{query_text}*")
237
- ])
238
- if class_name != "Team":
239
- filters = Filter.any_of([
240
- Filter.by_property("title").like(f"*{query_text}*"),
241
- Filter.by_property("skills").like(f"*{query_text}*"),
242
- Filter.by_property("description").like(f"*{query_text}*")
243
- ])
244
- response_fallback = collection.query.fetch_objects(limit=limit, filters=filters)
245
- items = [obj.properties for obj in response_fallback.objects]
246
-
247
  return items
 
248
  except Exception as e:
249
  print(f"[Weaviate Query Error] {e}")
250
  return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  # -------------------- RAG Prompt Builder --------------------
253
  def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
254
  context_parts = []
@@ -811,6 +889,7 @@ with gr.Blocks(css="""
811
  with gr.Row():
812
  clear_btn = gr.Button("Reset Conversation")
813
  instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
 
814
 
815
  # persistent state across turns
816
  chat_history_state = gr.State([])
 
66
 
67
  def extract_skills_from_text(cv_text: str) -> List[str]:
68
  skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
69
+ return [s.capitalize() for s in skills
70
+
71
 
72
  # --- Process uploaded file (PDF, DOCX, TXT) ---
73
  def process_uploaded_file(file_obj: Any) -> dict | None:
 
223
 
224
  # -------------------- Query Weaviate --------------------
225
  def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
226
+ """
227
+ Performs a hybrid search on a Weaviate collection to get more relevant results
228
+ for conversational queries.
229
+ """
230
  try:
231
  collection = weaviate_client.collections.get(class_name)
232
 
233
+ # Use hybrid search: combines vector (semantic) and keyword (BM25) search
234
+ response = collection.query.hybrid(
235
+ query=query_text,
236
+ limit=limit,
237
+ # For job searches, prioritize matching the title
238
+ query_properties=["title^2", "description", "skills"] if class_name == "Job" else None
239
+ )
240
+
241
  items = [obj.properties for obj in response.objects]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  return items
243
+
244
  except Exception as e:
245
  print(f"[Weaviate Query Error] {e}")
246
  return []
247
+ # -------------------- NEW: Search All Collections --------------------
248
+ # -------------------- RAG Answer (Modified for Multi-Class Search) --------------------
249
+ def rag_answer_all(user_question: str, top_k: int = 3) -> (str, list[dict]):
250
+ # Step 1: Search across all relevant collections
251
+ retrieved_items = search_all_collections(user_question, limit_per_class=top_k)
252
+
253
+ if not retrieved_items:
254
+ return f"Sorry, I couldn't find any results related to '{user_question}' in our Jobs, Projects, or Opportunities databases.", []
255
+
256
+ # Step 2: Build a new prompt that handles multiple sources
257
+ context_parts = []
258
+ # Group results by class for clearer presentation in the prompt
259
+ grouped_results = {}
260
+ for item in retrieved_items:
261
+ class_name = item["class_name"]
262
+ if class_name not in grouped_results:
263
+ grouped_results[class_name] = []
264
+ grouped_results[class_name].append(item["properties"])
265
+
266
+ for class_name, items in grouped_results.items():
267
+ context_parts.append(f"\n--- Results from '{class_name}' collection ---")
268
+ for i, properties in enumerate(items, 1):
269
+ details = {k: str(v) for k, v in properties.items()}
270
+ item_str = f"Record {i}:\n{json.dumps(details, indent=2, ensure_ascii=False)}"
271
+ context_parts.append(item_str)
272
+
273
+ context_block = "\n".join(context_parts)
274
 
275
+ prompt = f"""
276
+ User Question: "{user_question}"
277
+
278
+ You are an expert AI assistant. Your mission is to analyze structured data from different categories (Jobs, Projects, Opportunities) and present a comprehensive, clear summary to the user.
279
+
280
+ **Primary Directive:** Your ONLY source of information is the structured JSON data provided below under "Retrieved Data". If the data section is empty, state that no results were found.
281
+
282
+ **Your Core Instructions:**
283
+ 1. **Acknowledge the Categories:** Analyze all the data provided from each collection (`Job`, `Project`, `Opportunities`).
284
+ 2. **Summarize Logically:** For each result, **you must clearly state which category it belongs to**. For example, start with "I found a **Job** opportunity:" or "Here is a **Project** you might be interested in:".
285
+ 3. **Present All Details:** Convert the data for each item into natural, readable language, covering all important details like title, company/creator, description, and skills.
286
+ 4. **Use Clear Formatting:** Use Markdown headings (e.g., `### Job: [Title]`) and bullet points to make the response easy to read.
287
+
288
+ Retrieved Data:
289
+ {context_block}
290
+ """
291
+
292
+ # Step 3: Call the LLM to get the final answer
293
+ try:
294
+ resp = llm_client.chat.completions.create(
295
+ model=MODEL_NAME,
296
+ messages=[
297
+ {"role": "system", "content": SYSTEM_PROMPT_BASE},
298
+ {"role": "user", "content": prompt}
299
+ ],
300
+ temperature=0.3,
301
+ max_tokens=4096
302
+ )
303
+ answer = resp.choices[0].message.content or ""
304
+ except Exception as e:
305
+ print(f"[RAG LLM Error] {e}")
306
+ answer = "⚠️ Sorry, I couldn't process that. Try again later."
307
+
308
+ return answer, retrieved_items
309
+
310
+ # ========== IDLE STATE ==========
311
+ if st == "idle":
312
+ low = text.lower()
313
+
314
+ # ... (The first parts for greetings and flow starters remain the same)
315
+ # ... (e.g., if any(k in low for k in ["apply",...])
316
+ # ... (e.g., if any(k in low for k in ["team",...])
317
+ # ... (e.g., if any(k in low for k in ["recommend",...])
318
+
319
+ # 3) Check for specific Knowledge Base intents
320
+ intent = route_intent(text)
321
+ if intent and intent.startswith("kb_"):
322
+ kb_ans = kb_fallback(intent)
323
+ if kb_ans:
324
+ return kb_ans, session, False
325
+
326
+ # 4) If it's not a command or KB question, perform a global RAG search
327
+ # This is now the default action for any general query.
328
+ rag_ans, _ = rag_answer_all(text)
329
+ return rag_ans, session, False
330
  # -------------------- RAG Prompt Builder --------------------
331
  def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
332
  context_parts = []
 
889
  with gr.Row():
890
  clear_btn = gr.Button("Reset Conversation")
891
  instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
892
+
893
 
894
  # persistent state across turns
895
  chat_history_state = gr.State([])