Spaces:

ChatbotNova
/

Chatbot-Backend

Sleeping

App Files Files Community

srilakshu012456 commited on Jan 5

Commit

46344cb

verified ·

1 Parent(s): dd5e496

Update main.py

Browse files

Files changed (1) hide show

main.py +124 -10

main.py CHANGED Viewed

@@ -161,24 +161,138 @@ def _ensure_numbering(text: str) -> str:
         out.append(f"{marker} {seg}")
     return "\n".join(out)
 def _filter_error_lines_by_query(text: str, query: str, max_lines: int = 4) -> str:
     def _norm(s: str) -> str:
         s = (s or "").lower()
         s = re.sub(r"[^\w\s]", " ", s)
         s = re.sub(r"\s+", " ", s).strip()
         return s
     q = _norm(query)
-    q_terms = [t for t in q.split() if len(t) > 2]
-    if not q_terms:
-        return text or ""
-    kept: List[str] = []
-    for ln in _normalize_lines(text):
         ln_norm = _norm(ln)
-        if any(t in ln_norm for t in q_terms):
-            kept.append(ln)
-        if len(kept) >= max_lines:
-            break
-    return "\n".join(kept).strip() if kept else (text or "").strip()
 def _friendly_permission_reply(raw: str) -> str:
     line = (raw or "").strip()

         out.append(f"{marker} {seg}")
     return "\n".join(out)
 def _filter_error_lines_by_query(text: str, query: str, max_lines: int = 4) -> str:
+    """
+    Pick the most relevant 'Common Errors & Resolution' bullets for the user's message.
+    Generic across SOPs via error families + phrase overlap.
+    Prioritization:
+      1) error-family match (NOT_FOUND/MISMATCH/LOCKED/PERMISSION/TIMEOUT/SYNC),
+      2) anchored starts (line begins with the error phrase/heading),
+      3) multi-word overlap (bigrams/trigrams),
+      4) token overlap,
+      5) bullet/heading formatting bonus.
+    If no line matches positively, falls back to the first few lines.
+    """
+    import re
+    from typing import List, Tuple
+    # --- Generic error families (SOP-wide) ---
+    ERROR_FAMILIES = {
+        "NOT_FOUND": (
+            "not found", "missing", "does not exist", "doesn't exist",
+            "unavailable", "not available", "cannot find", "no such", "not present", "absent"
+        ),
+        "MISMATCH": (
+            "mismatch", "doesn't match", "does not match", "variance",
+            "difference", "discrepancy", "not equal"
+        ),
+        "LOCKED": (
+            "locked", "status locked", "blocked", "read only", "read-only", "frozen", "freeze"
+        ),
+        "PERMISSION": (
+            "permission", "permissions", "access denied", "not authorized",
+            "not authorised", "insufficient privileges", "no access", "authorization", "authorisation"
+        ),
+        "TIMEOUT": (
+            "timeout", "timed out", "network", "connection", "unable to connect",
+            "disconnected", "no network"
+        ),
+        "SYNC": (
+            "sync", "synchronization", "synchronisation", "replication",
+            "refresh", "out of sync", "stale", "delay", "lag"
+        ),
+    }
+    # Normalizer
     def _norm(s: str) -> str:
         s = (s or "").lower()
         s = re.sub(r"[^\w\s]", " ", s)
         s = re.sub(r"\s+", " ", s).strip()
         return s
+    # Detect error families mentioned in a string
+    def _families_for(s: str) -> List[str]:
+        out = []
+        low = _norm(s)
+        for fam, syns in ERROR_FAMILIES.items():
+            if any(k in low for k in syns):
+                out.append(fam)
+        return out
+    # N-grams
+    def _ngrams(tokens: List[str], n: int) -> List[str]:
+        return [" ".join(tokens[i:i+n]) for i in range(len(tokens) - n + 1)]
+    # Normalize query
     q = _norm(query)
+    q_tokens = [t for t in q.split() if len(t) > 1]
+    q_bi = _ngrams(q_tokens, 2)
+    q_tri = _ngrams(q_tokens, 3)
+    q_families = set(_families_for(query))
+    # Candidate lines
+    lines = _normalize_lines(text)
+    if not lines:
+        return (text or "").strip()
+    scored: List[Tuple[float, str]] = []
+    for ln in lines:
         ln_norm = _norm(ln)
+        ln_families = set(_families_for(ln))
+        # --- Signals ---
+        # Family match (strong): any overlap between query families and line families
+        fam_overlap = len(q_families & ln_families)
+        fam_score = 1.60 * fam_overlap  # strong boost when families line up
+        # Exact phrase (medium-strong)
+        exact_phrase = 1.00 if (q and q in ln_norm) else 0.0
+        # Anchored start (strong for bullet headings like "ASN not found: ...")
+        first2 = " ".join(q_tokens[:2]) if len(q_tokens) >= 2 else ""
+        first3 = " ".join(q_tokens[:3]) if len(q_tokens) >= 3 else ""
+        anchored = 1.00 if (first3 and ln_norm.startswith(first3)) or (first2 and ln_norm.startswith(first2)) else 0.0
+        # Multi-word phrase overlap
+        bigram_hits = sum(1 for bg in q_bi if bg and bg in ln_norm)
+        trigram_hits = sum(1 for tg in q_tri if tg and tg in ln_norm)
+        # Token overlap (fallback)
+        token_overlap = sum(1 for t in q_tokens if t and t in ln_norm)
+        # --- Score composition (tuned for generic SOPs) ---
+        score = (
+            fam_score +
+            0.90 * anchored +
+            0.80 * trigram_hits +
+            0.55 * bigram_hits +
+            0.45 * exact_phrase +
+            0.30 * token_overlap
+        )
+        # Small bonuses for bullets/heading-like lines
+        if re.match(r"^\s*[\-\*\u2022]\s*", ln):  # bullet dot
+            score += 0.10
+        # Heading before ':' matches some part of the query
+        heading = ln_norm.split(":")[0].strip()
+        if heading and (heading in q or (first2 and first2 in heading)):
+            score += 0.15
+        scored.append((score, ln))
+    # Sort by score desc and take top max_lines
+    scored.sort(key=lambda x: x[0], reverse=True)
+    top = [ln for s, ln in scored[:max_lines] if s > 0.0]
+    # Fallback if everything scored zero
+    if not top:
+        top = lines[:max_lines]
+    return "\n".join(top).strip()
 def _friendly_permission_reply(raw: str) -> str:
     line = (raw or "").strip()