Spaces:

ChatbotNova
/

Chatbot-Backend

Sleeping

App Files Files Community

srilakshu012456 commited on Dec 23, 2025

Commit

fe3eff0

verified ·

1 Parent(s): 9982f90

Update services/kb_creation.py

Browse files

Files changed (1) hide show

services/kb_creation.py +29 -8

services/kb_creation.py CHANGED Viewed

@@ -227,14 +227,12 @@ def bm25_search(query: str, top_k: int = 50) -> List[Tuple[int, float]]:
     q_terms = _tokenize(norm)
     if not q_terms:
         return []
     candidates = set()
     for t in q_terms:
         for idx in bm25_inverted.get(t, []):
             candidates.add(idx)
     if not candidates:
         candidates = set(range(len(bm25_docs)))
     scored = []
     for idx in candidates:
         s = _bm25_score_for_doc(q_terms, idx)
@@ -285,7 +283,7 @@ ACTION_SYNONYMS = {
     "update": ["update", "modify", "change", "edit"],
     "delete": ["delete", "remove"],
     "navigate": ["navigate", "go to", "open"],
-    "perform": ["perform", "execute", "do"],
 }
 def _detect_user_intent(query: str) -> str:
@@ -338,7 +336,7 @@ def _action_weight(text: str, actions: List[str]) -> float:
         for syn in ACTION_SYNONYMS.get(act, [act]):
             if syn in t:
                 score += 1.0
-    conflicts = {"create": ["delete"], "delete": ["create"], "update": ["delete"], "navigate": [], "perform": []}
     for act in actions:
         for bad in conflicts.get(act, []):
             for syn in ACTION_SYNONYMS.get(bad, [bad]):
@@ -381,9 +379,9 @@ def hybrid_search_knowledge_base(query: str, top_k: int = 10, alpha: float = 0.6
     union_ids = set(sem_ids) | set(bm25_id_to_norm.keys())
-    gamma = 0.25
-    delta = 0.35
-    epsilon = 0.30
     combined_records_ext: List[Tuple[str, float, float, str, Dict[str, Any], float, float, float]] = []
     for cid in union_ids:
@@ -463,7 +461,30 @@ def hybrid_search_knowledge_base(query: str, top_k: int = 10, alpha: float = 0.6
         "actions": actions,
     }
-# --- Runtime info & reset helpers (optional admin) ---
 def get_kb_runtime_info() -> Dict[str, Any]:
     return {
         "chroma_path": CHROMA_PATH,

     q_terms = _tokenize(norm)
     if not q_terms:
         return []
     candidates = set()
     for t in q_terms:
         for idx in bm25_inverted.get(t, []):
             candidates.add(idx)
     if not candidates:
         candidates = set(range(len(bm25_docs)))
     scored = []
     for idx in candidates:
         s = _bm25_score_for_doc(q_terms, idx)
     "update": ["update", "modify", "change", "edit"],
     "delete": ["delete", "remove"],
     "navigate": ["navigate", "go to", "open"],
+    # NOTE: 'perform' REMOVED to avoid wrong boosts like Appointment "performed..."
 }
 def _detect_user_intent(query: str) -> str:
         for syn in ACTION_SYNONYMS.get(act, [act]):
             if syn in t:
                 score += 1.0
+    conflicts = {"create": ["delete"], "delete": ["create"], "update": ["delete"], "navigate": []}
     for act in actions:
         for bad in conflicts.get(act, []):
             for syn in ACTION_SYNONYMS.get(bad, [bad]):
     union_ids = set(sem_ids) | set(bm25_id_to_norm.keys())
+    gamma = 0.25  # meta overlap
+    delta = 0.35  # intent boost
+    epsilon = 0.30  # action weight
     combined_records_ext: List[Tuple[str, float, float, str, Dict[str, Any], float, float, float]] = []
     for cid in union_ids:
         "actions": actions,
     }
+# --------------------------- Section fetch helpers (for full output) ---------------------------
+def get_section_text(filename: str, section: str) -> str:
+    """Concatenate all chunk texts for a given filename+section."""
+    texts: List[str] = []
+    for d in bm25_docs:
+        m = d.get("meta", {})
+        if m.get("filename") == filename and m.get("section") == section:
+            t = (d.get("text") or "").strip()
+            if t:
+                texts.append(t)
+    return "\n\n".join(texts).strip()
+def get_best_steps_section_text(filename: str) -> str:
+    """Return combined text of all 'steps' sections in the given SOP (filename)."""
+    texts: List[str] = []
+    for d in bm25_docs:
+        m = d.get("meta", {})
+        if m.get("filename") == filename and (m.get("intent_tag") == "steps"):
+            t = (d.get("text") or "").strip()
+            if t:
+                texts.append(t)
+    return "\n\n".join(texts).strip()
+# --- Admin helpers (optional; unchanged) ---
 def get_kb_runtime_info() -> Dict[str, Any]:
     return {
         "chroma_path": CHROMA_PATH,