Spaces:

ChatbotNova
/

Chatbot-Backend

Sleeping

App Files Files Community

srilakshu012456 commited on Jan 11

Commit

2ef7e03

verified ·

1 Parent(s): 0384218

Update main.py

Browse files

Files changed (1) hide show

main.py +101 -0

main.py CHANGED Viewed

@@ -225,6 +225,107 @@ def _filter_numbered_steps_by_actions(numbered_text: str,
     # If over-filtering made it empty, fall back to original text
     return "\n".join(out_lines).strip() or (numbered_text or "").strip()
 def _ensure_numbering(text: str) -> str:
     """
     Normalize raw SOP steps into a clean numbered list using circled digits.

     # If over-filtering made it empty, fall back to original text
     return "\n".join(out_lines).strip() or (numbered_text or "").strip()
+# --- NEW: extract the anchor clause from user message ---
+def _extract_anchor_from_query(msg: str) -> dict:
+    """
+    Split user message into:
+      - 'anchor': the clause we should match against SOP steps.
+      - 'has_followup': True when user is clearly asking what comes next (what next, what to do, then).
+    Works across any SOP text; no domain words required.
+    """
+    raw = (msg or "").strip()
+    low = _norm_text(raw)
+    # very small set of generic follow-up cues (no domain synonyms)
+    FOLLOWUP_CUES = ("what next", "what is next", "what to do", "then", "after that", "next")
+    has_followup = any(cue in low for cue in FOLLOWUP_CUES)
+    # Split by common separators to isolate the anchor clause
+    parts = [p.strip() for p in re.split(r"[?.,;:\-\n]+", raw) if p.strip()]
+    if not parts:
+        return {"anchor": raw, "has_followup": has_followup}
+    # If a follow-up cue is in the last part, prefer the preceding part as the anchor
+    last = parts[-1]
+    last_low = _norm_text(last)
+    if any(cue in last_low for cue in FOLLOWUP_CUES) and len(parts) >= 2:
+        anchor = parts[-2]
+    else:
+        anchor = parts[0] if len(parts) == 1 else parts[-1]  # favor the end if user writes "... , then what next"
+    return {"anchor": anchor.strip(), "has_followup": has_followup}
+# --- Core: keyword-free, anchor-based "next steps" ---
+def _anchor_next_steps(user_message: str, numbered_text: str, max_next: int = 8) -> list | None:
+    """
+    Locate the step line (or sentence inside it) that best matches the user's anchor clause,
+    then return ONLY subsequent steps (renumbered by caller). Returns None if no strong anchor is found.
+    """
+    steps = _split_sop_into_steps(numbered_text)
+    if not steps:
+        return None
+    info = _extract_anchor_from_query(user_message)
+    anchor = info.get("anchor", "").strip()
+    if not anchor:
+        return None
+    anchor_norm = _norm_text(anchor)
+    has_followup = bool(info.get("has_followup"))
+    best_idx, best_score, best_literal = -1, -1.0, False
+    candidate_indices = []
+    for idx, step_line in enumerate(steps):
+        # Score on full line
+        s_full = _similarity(anchor, step_line)
+        literal_hit = False
+        sent_scores = [s_full]
+        # Also score each sentence within this step (to match middle sentences)
+        for s in _split_sentences(step_line):
+            sent_scores.append(_similarity(anchor, s))
+            # literal containment (punctuation-insensitive)
+            a_flat = re.sub(r"\W+", "", anchor_norm)
+            s_flat = re.sub(r"\W+", "", _norm_text(s))
+            if a_flat and (a_flat in s_flat or s_flat in a_flat):
+                literal_hit = True
+        score = max(sent_scores)
+        candidate_indices.append((idx, score, literal_hit))
+    # Choose best; for near ties prefer later index (progress assumption)
+    candidate_indices.sort(key=lambda t: (t[1], t[0]), reverse=True)
+    best_idx, best_score, best_literal = candidate_indices[0]
+    # Dynamic threshold:
+    # - literal containment → accept
+    # - follow-up cue → lower threshold (e.g., 0.50)
+    # - otherwise require a modest similarity
+    tok_count = len([t for t in anchor_norm.split() if len(t) > 1])
+    if best_literal:
+        accept = True
+    else:
+        base_ok = best_score >= (0.55 if not has_followup else 0.50)
+        len_ok = (best_score >= 0.40) and (tok_count >= 3)
+        accept = base_ok or len_ok
+    if not accept:
+        return None
+    # Start from the step AFTER the matched one
+    start = best_idx + 1
+    if start >= len(steps):
+        return []  # already at final step
+    end = min(start + max_next, len(steps))
+    next_steps = steps[start:end]
+    # Dedupe just in case adjacent chunks contain repeated lines
+    # (use the same numbering by caller)
+    return [ln for ln in _dedupe_lines("\n".join(next_steps)).splitlines() if ln.strip()]
 def _ensure_numbering(text: str) -> str:
     """
     Normalize raw SOP steps into a clean numbered list using circled digits.