Spaces:

ChatbotNova
/

Chatbot-Backend

Sleeping

App Files Files Community

srilakshu012456 commited on Jan 6

Commit

84e70a3

verified ·

1 Parent(s): 16cbfa8

Update main.py

Browse files

Files changed (1) hide show

main.py +54 -88

main.py CHANGED Viewed

@@ -264,7 +264,7 @@ def _pick_default_action_section_with_preference(best_doc: str, prefer_action: O
                 return t
     return sections[0] if sections else None
-# ------------------------------ Action -> section selector (optional fallback by title) ------------------------------
 ACTION_SECTION_KEYS = {
     "create": ("create", "creation", "appointment creation", "new appointment", "book", "schedule"),
     "update": ("update", "updation", "reschedule", "change", "modify", "edit"),
@@ -318,7 +318,7 @@ def _find_save_lines_in_section(section_text: str, max_lines: int = 2) -> str:
                 break
     return "\n".join(lines)
-# ------------------------------ Generic boundary cutter (metadata-driven) ------------------------------
 def _build_doc_section_index(best_doc: str) -> Dict[str, Optional[str]]:
     """
     Build a dictionary for the given doc:
@@ -345,19 +345,9 @@ def _cut_at_next_boundary_generic(section_text: str, best_doc: str, current_acti
     if not (section_text or "").strip():
         return section_text
-    # Build metadata-based index: {lower(section_title): lower(action_tag or None)}
-    index: Dict[str, Optional[str]] = {}
-    for d in bm25_docs:
-        m = d.get("meta", {}) or {}
-        if m.get("filename") == best_doc and m.get("intent_tag") == "steps":
-            sec = (m.get("section") or "").strip()
-            tag = (m.get("action_tag") or "").strip().lower() or None
-            if sec:
-                index[sec.lower()] = tag
     known_headings = set(index.keys())
-    # Generic action families (no SOP-specific words)
     ACTION_FAMILIES = {
         "create": ("create", "creation", "new"),
         "update": ("update", "updation", "reschedule", "edit", "modify", "change"),
@@ -378,7 +368,6 @@ def _cut_at_next_boundary_generic(section_text: str, best_doc: str, current_acti
             return True
         if any(h in line_low for h in known_headings):
             return True
-        # Simple title-style heuristic
         if len(raw_line.strip()) <= 140:
             words = re.findall(r"[A-Za-z][A-Za-z]+", raw_line)
             cap_ratio = sum(1 for w in words if (w[0].isupper() or w.isupper())) / (len(words) or 1)
@@ -392,7 +381,7 @@ def _cut_at_next_boundary_generic(section_text: str, best_doc: str, current_acti
     for ln in lines:
         low = ln.lower().strip()
-        # 1) Metadata heading boundary (best case)
         matched_heading = None
         for h in known_headings:
             if h in low:
@@ -407,7 +396,6 @@ def _cut_at_next_boundary_generic(section_text: str, best_doc: str, current_acti
         # 2) Generic action boundary (works even if visible text != metadata title)
         fam = detect_action_family_in_line(low)
         if current_action and fam and fam != current_action:
-            # treat heading-like OR numbered lines as boundaries
             if is_heading_like(ln, low) or STEP_PREFIX_RX.match(ln):
                 break
@@ -559,78 +547,56 @@ def _format_steps_as_numbered(steps: list) -> str:
         out.append(f"{circled.get(i, str(i))} {s}")
     return "\n".join(out)
-# ------------------------------ Error lines helpers ------------------------------
-def _filter_error_lines_by_query(text: str, query: str, max_lines: int = 1) -> str:
-    def _norm(s: str) -> str:
-        s = (s or "").lower()
-        s = re.sub(r"[^\w\s]", " ", s)
-        s = re.sub(r"\s+", " ", s).strip()
-        return s
-    def _ngrams(tokens: List[str], n: int) -> List[str]:
-        return [" ".join(tokens[i:i + n]) for i in range(len(tokens) - n + 1)]
-    def _families_for(s: str) -> set:
-        low = _norm(s)
-        fams = set()
-        for fam, syns in ERROR_FAMILY_SYNS.items():
-            if any(k in low for k in syns):
-                fams.add(fam)
-        return fams
-    q = _norm(query)
-    q_tokens = [t for t in q.split() if len(t) > 1]
-    q_bi = _ngrams(q_tokens, 2)
-    q_tri = _ngrams(q_tokens, 3)
-    q_fams = _families_for(query)
-    lines = _normalize_lines(text)
-    if not lines:
-        return (text or "").strip()
-    scored: List[Tuple[float, str]] = []
-    for ln in lines:
-        ln_norm = _norm(ln)
-        ln_fams = _families_for(ln)
-        fam_overlap = len(q_fams & ln_fams)
-        anchored = 0.0
-        first2 = " ".join(q_tokens[:2]) if len(q_tokens) >= 2 else ""
-        first3 = " ".join(q_tokens[:3]) if len(q_tokens) >= 3 else ""
-        if (first3 and ln_norm.startswith(first3)) or (first2 and ln_norm.startswith(first2)):
-            anchored = 1.0
-        bigram_hits = sum(1 for bg in q_bi if bg and bg in ln_norm)
-        trigram_hits = sum(1 for tg in q_tri if tg and tg in ln_norm)
-        token_overlap = sum(1 for t in q_tokens if t and t in ln_norm)
-        exact_phrase = 1.0 if (q and q in ln_norm) else 0.0
-        score = (
-            1.70 * fam_overlap +
-            1.00 * anchored +
-            0.80 * trigram_hits +
-            0.55 * bigram_hits +
-            0.40 * exact_phrase +
-            0.30 * token_overlap
-        )
-        if re.match(r"^\s*[-*\u2022]\s*", ln):
-            score += 0.10
-        heading = ln_norm.split(":")[0].strip()
-        if heading and (heading in q or (first2 and first2 in heading)):
-            score += 0.15
-        scored.append((score, ln))
-    scored.sort(key=lambda x: x[0], reverse=True)
-    top = [ln for s, ln in scored[:max_lines] if s > 0.0]
-    if not top:
-        top = lines[:max_lines]
-    return "\n".join(top).strip()
-def _friendly_permission_reply(raw: str) -> str:
-    line = (raw or "").strip()
-    line = re.sub(r"^\s*[-*\u2022]\s*", "", line)
-    if not line:
-        return "It looks like you may not have access for this action. Please verify your WMS role/permission with your supervisor or IT."
-    if "verify role access" in line.lower():
-        return "It looks like you may not have access for this action. Please verify your WMS role/permission with your supervisor or IT."
-    if ("permission" in line.lower()) or ("access" in line.lower()) or ("authorization" in line.lower()):
-        return f"It seems to be an access issue: {line}. Please check your role mapping or request access."
-    return line
 # ------------------------------ Language hint ------------------------------
 def _detect_language_hint(msg: str) -> Optional[str]:

                 return t
     return sections[0] if sections else None
+# ------------------------------ Optional title-based fallback ------------------------------
 ACTION_SECTION_KEYS = {
     "create": ("create", "creation", "appointment creation", "new appointment", "book", "schedule"),
     "update": ("update", "updation", "reschedule", "change", "modify", "edit"),
                 break
     return "\n".join(lines)
+# ------------------------------ Generic boundary cutter (metadata + action-family) ------------------------------
 def _build_doc_section_index(best_doc: str) -> Dict[str, Optional[str]]:
     """
     Build a dictionary for the given doc:
     if not (section_text or "").strip():
         return section_text
+    index = _build_doc_section_index(best_doc)  # {lower(section_title): action_tag}
     known_headings = set(index.keys())
     ACTION_FAMILIES = {
         "create": ("create", "creation", "new"),
         "update": ("update", "updation", "reschedule", "edit", "modify", "change"),
             return True
         if any(h in line_low for h in known_headings):
             return True
         if len(raw_line.strip()) <= 140:
             words = re.findall(r"[A-Za-z][A-Za-z]+", raw_line)
             cap_ratio = sum(1 for w in words if (w[0].isupper() or w.isupper())) / (len(words) or 1)
     for ln in lines:
         low = ln.lower().strip()
+        # 1) Metadata heading boundary
         matched_heading = None
         for h in known_headings:
             if h in low:
         # 2) Generic action boundary (works even if visible text != metadata title)
         fam = detect_action_family_in_line(low)
         if current_action and fam and fam != current_action:
             if is_heading_like(ln, low) or STEP_PREFIX_RX.match(ln):
                 break
         out.append(f"{circled.get(i, str(i))} {s}")
     return "\n".join(out)
+# ------------------------------ Context filter (ensure defined before /chat) ------------------------------
+def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str, Any]]:
+    """
+    Keep only the most relevant sentences from the KB context for the query.
+    Returns (filtered_text, info_dict).
+    """
+    STRICT_OVERLAP = 3
+    MAX_SENTENCES_STRICT = 4
+    MAX_SENTENCES_CONCISE = 3
+    def _norm(text: str) -> str:
+        t = (text or "").lower()
+        t = re.sub(r"[^\w\s]", " ", t)
+        t = re.sub(r"\s+", " ", t).strip()
+        return t
+    def _split_sentences(ctx: str) -> List[str]:
+        raw_sents = re.split(r"(?<=[.!?])\s+|\n+|-\s*|\*\s*", ctx or "")
+        return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
+    ctx = (context or "").strip()
+    if not ctx or not query:
+        return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
+    q_norm = _norm(query)
+    q_terms = [t for t in q_norm.split() if len(t) > 2]
+    if not q_terms:
+        return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
+    sentences = _split_sentences(ctx)
+    matched_exact, matched_any = [], []
+    for s in sentences:
+        s_norm = _norm(s)
+        is_bullet = bool(re.match(r"^[\-\*]\s*", s))
+        overlap = sum(1 for t in q_terms if t in s_norm) + (1 if is_bullet else 0)
+        if overlap >= STRICT_OVERLAP:
+            matched_exact.append(s)
+        elif overlap > 0:
+            matched_any.append(s)
+    if matched_exact:
+        kept = matched_exact[:MAX_SENTENCES_STRICT]
+        return "\n".join(kept).strip(), {'mode': 'exact', 'matched_count': len(kept), 'all_sentences': len(sentences)}
+    if matched_any:
+        kept = matched_any[:MAX_SENTENCES_CONCISE]
+        return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': len(kept), 'all_sentences': len(sentences)}
+    kept = sentences[:MAX_SENTENCES_CONCISE]
+    return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
 # ------------------------------ Language hint ------------------------------
 def _detect_language_hint(msg: str) -> Optional[str]: