srilakshu012456 commited on
Commit
fe3eff0
·
verified ·
1 Parent(s): 9982f90

Update services/kb_creation.py

Browse files
Files changed (1) hide show
  1. services/kb_creation.py +29 -8
services/kb_creation.py CHANGED
@@ -227,14 +227,12 @@ def bm25_search(query: str, top_k: int = 50) -> List[Tuple[int, float]]:
227
  q_terms = _tokenize(norm)
228
  if not q_terms:
229
  return []
230
-
231
  candidates = set()
232
  for t in q_terms:
233
  for idx in bm25_inverted.get(t, []):
234
  candidates.add(idx)
235
  if not candidates:
236
  candidates = set(range(len(bm25_docs)))
237
-
238
  scored = []
239
  for idx in candidates:
240
  s = _bm25_score_for_doc(q_terms, idx)
@@ -285,7 +283,7 @@ ACTION_SYNONYMS = {
285
  "update": ["update", "modify", "change", "edit"],
286
  "delete": ["delete", "remove"],
287
  "navigate": ["navigate", "go to", "open"],
288
- "perform": ["perform", "execute", "do"],
289
  }
290
 
291
  def _detect_user_intent(query: str) -> str:
@@ -338,7 +336,7 @@ def _action_weight(text: str, actions: List[str]) -> float:
338
  for syn in ACTION_SYNONYMS.get(act, [act]):
339
  if syn in t:
340
  score += 1.0
341
- conflicts = {"create": ["delete"], "delete": ["create"], "update": ["delete"], "navigate": [], "perform": []}
342
  for act in actions:
343
  for bad in conflicts.get(act, []):
344
  for syn in ACTION_SYNONYMS.get(bad, [bad]):
@@ -381,9 +379,9 @@ def hybrid_search_knowledge_base(query: str, top_k: int = 10, alpha: float = 0.6
381
 
382
  union_ids = set(sem_ids) | set(bm25_id_to_norm.keys())
383
 
384
- gamma = 0.25
385
- delta = 0.35
386
- epsilon = 0.30
387
 
388
  combined_records_ext: List[Tuple[str, float, float, str, Dict[str, Any], float, float, float]] = []
389
  for cid in union_ids:
@@ -463,7 +461,30 @@ def hybrid_search_knowledge_base(query: str, top_k: int = 10, alpha: float = 0.6
463
  "actions": actions,
464
  }
465
 
466
- # --- Runtime info & reset helpers (optional admin) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  def get_kb_runtime_info() -> Dict[str, Any]:
468
  return {
469
  "chroma_path": CHROMA_PATH,
 
227
  q_terms = _tokenize(norm)
228
  if not q_terms:
229
  return []
 
230
  candidates = set()
231
  for t in q_terms:
232
  for idx in bm25_inverted.get(t, []):
233
  candidates.add(idx)
234
  if not candidates:
235
  candidates = set(range(len(bm25_docs)))
 
236
  scored = []
237
  for idx in candidates:
238
  s = _bm25_score_for_doc(q_terms, idx)
 
283
  "update": ["update", "modify", "change", "edit"],
284
  "delete": ["delete", "remove"],
285
  "navigate": ["navigate", "go to", "open"],
286
+ # NOTE: 'perform' REMOVED to avoid wrong boosts like Appointment "performed..."
287
  }
288
 
289
  def _detect_user_intent(query: str) -> str:
 
336
  for syn in ACTION_SYNONYMS.get(act, [act]):
337
  if syn in t:
338
  score += 1.0
339
+ conflicts = {"create": ["delete"], "delete": ["create"], "update": ["delete"], "navigate": []}
340
  for act in actions:
341
  for bad in conflicts.get(act, []):
342
  for syn in ACTION_SYNONYMS.get(bad, [bad]):
 
379
 
380
  union_ids = set(sem_ids) | set(bm25_id_to_norm.keys())
381
 
382
+ gamma = 0.25 # meta overlap
383
+ delta = 0.35 # intent boost
384
+ epsilon = 0.30 # action weight
385
 
386
  combined_records_ext: List[Tuple[str, float, float, str, Dict[str, Any], float, float, float]] = []
387
  for cid in union_ids:
 
461
  "actions": actions,
462
  }
463
 
464
+ # --------------------------- Section fetch helpers (for full output) ---------------------------
465
+ def get_section_text(filename: str, section: str) -> str:
466
+ """Concatenate all chunk texts for a given filename+section."""
467
+ texts: List[str] = []
468
+ for d in bm25_docs:
469
+ m = d.get("meta", {})
470
+ if m.get("filename") == filename and m.get("section") == section:
471
+ t = (d.get("text") or "").strip()
472
+ if t:
473
+ texts.append(t)
474
+ return "\n\n".join(texts).strip()
475
+
476
+ def get_best_steps_section_text(filename: str) -> str:
477
+ """Return combined text of all 'steps' sections in the given SOP (filename)."""
478
+ texts: List[str] = []
479
+ for d in bm25_docs:
480
+ m = d.get("meta", {})
481
+ if m.get("filename") == filename and (m.get("intent_tag") == "steps"):
482
+ t = (d.get("text") or "").strip()
483
+ if t:
484
+ texts.append(t)
485
+ return "\n\n".join(texts).strip()
486
+
487
+ # --- Admin helpers (optional; unchanged) ---
488
  def get_kb_runtime_info() -> Dict[str, Any]:
489
  return {
490
  "chroma_path": CHROMA_PATH,