Spaces:

ChatbotNova
/

Chatbot-Backend

Sleeping

App Files Files Community

srilakshu012456 commited on Dec 23, 2025

Commit

aa65b5b

verified ·

1 Parent(s): 60c3916

Update main.py

Browse files

Files changed (1) hide show

main.py +61 -39

main.py CHANGED Viewed

@@ -10,12 +10,12 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from dotenv import load_dotenv
 from datetime import datetime
-#new
 # KB services (Chroma + sentence-transformers + BM25 hybrid)
 from services.kb_creation import (
     collection,
     ingest_documents,
-    hybrid_search_knowledge_base,  # new
 )
 # Optional routers/utilities you already have
@@ -68,22 +68,18 @@ class ChatInput(BaseModel):
     prev_status: Optional[str] = None  # "NO_KB_MATCH" | "PARTIAL" | "OK" | None
     last_issue: Optional[str] = None
 class IncidentInput(BaseModel):
     short_description: str
     description: str
     mark_resolved: Optional[bool] = False
 class TicketDescInput(BaseModel):
     issue: str
 class TicketStatusInput(BaseModel):
     sys_id: Optional[str] = None
     number: Optional[str] = None  # IncidentID (incident number)
 # ✅ Human‑readable mapping for ServiceNow incident state codes
 STATE_MAP = {
     "1": "New",
@@ -169,7 +165,6 @@ def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2
         "best_combined": best_combined,
     }
 def _strip_any_source_lines(text: str) -> str:
     lines = text.splitlines()
     kept = []
@@ -179,7 +174,6 @@ def _strip_any_source_lines(text: str) -> str:
         kept.append(ln)
     return "\n".join(kept).strip()
 def _build_clarifying_message() -> str:
     return (
         "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:\n\n"
@@ -193,10 +187,6 @@ def _build_clarifying_message() -> str:
 # ---------- Intent helpers ----------
 def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> tuple[str, str]:
-    """
-    Short: first 100 chars of the ORIGINAL issue text (preferred).
-    Long: clear sentence that includes both original issue and resolved ack.
-    """
     issue = (issue_text or "").strip()
     resolved = (resolved_text or "").strip()
     short_desc = issue[:100] if issue else (resolved[:100] or "Issue resolved (user confirmation)")
@@ -207,7 +197,6 @@ def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> tuple[s
     ).strip()
     return short_desc, long_desc
 def _is_incident_intent(msg_norm: str) -> bool:
     intent_phrases = [
         "create ticket", "create a ticket", "raise ticket", "raise a ticket", "open ticket", "open a ticket",
@@ -217,7 +206,6 @@ def _is_incident_intent(msg_norm: str) -> bool:
     ]
     return any(p in msg_norm for p in intent_phrases)
 def _is_feedback_message(msg_norm: str) -> bool:
     feedback_phrases = [
         "issue not resolved", "not resolved", "still not working",
@@ -226,7 +214,6 @@ def _is_feedback_message(msg_norm: str) -> bool:
     ]
     return any(p in msg_norm for p in feedback_phrases)
 def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
     status_keywords = [
         "status", "ticket status", "incident status",
@@ -246,7 +233,6 @@ def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
                 return {"number": val.upper() if val.lower().startswith("inc") else val}
     return {"number": None, "ask_number": True}
 def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
     phrases = [
         "it is resolved", "resolved", "issue resolved", "problem resolved",
@@ -255,7 +241,6 @@ def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
     ]
     return any(p in msg_norm for p in phrases)
 def _has_negation_resolved(msg_norm: str) -> bool:
     neg_phrases = [
         "not resolved", "issue not resolved", "still not working", "not working",
@@ -263,7 +248,6 @@ def _has_negation_resolved(msg_norm: str) -> bool:
     ]
     return any(p in msg_norm for p in neg_phrases)
 def _classify_resolution_llm(user_message: str) -> bool:
     if not GEMINI_API_KEY:
         return False
@@ -289,12 +273,7 @@ def _classify_resolution_llm(user_message: str) -> bool:
     except Exception:
         return False
 def _is_generic_issue(msg_norm: str) -> bool:
-    """
-    Returns True for very generic/open-ended issue statements that
-    shouldn’t trigger a KB search yet.
-    """
     generic_phrases = [
         "issue", "have an issue", "having an issue", "got an issue",
         "problem", "have a problem", "help", "need help", "support",
@@ -308,20 +287,17 @@ STRICT_OVERLAP = 3             # ≥3 shared terms → treat as exact match
 MAX_SENTENCES_STRICT = 4       # limit for exact-mode
 MAX_SENTENCES_CONCISE = 3      # limit for partial-mode
 def _normalize_for_match(text: str) -> str:
     t = (text or "").lower()
     t = re.sub(r"[^\w\s]", " ", t)      # remove punctuation
     t = re.sub(r"\s+", " ", t).strip()  # collapse spaces
     return t
 def _split_sentences(ctx: str) -> list[str]:
     # crude sentence split: punctuation/newlines/bullets/dashes
     raw_sents = re.split(r"(?<=[.!?])\s+|\n+|•\s*|-\s*", ctx or "")
     return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
 def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
     """
     Returns (filtered_text, info) where filtered_text is:
@@ -376,6 +352,44 @@ def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
         'all_sentences': len(sentences)
     }
 # ---------- Health ----------
 @app.get("/")
 async def health_check():
@@ -593,12 +607,23 @@ async def chat_with_ai(input_data: ChatInput):
         kb_ctx = extract_kb_context(kb_results, top_chunks=2)
         context_raw = kb_ctx.get("context", "") or ""
-        # NEW: filter to exact/concise and always preserve original order of matched sentences
         filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
         context = filtered_text
         context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
         best_distance = kb_ctx.get("best_score")    # lower = better
         best_combined = kb_ctx.get("best_combined") # higher = better
         # Dynamic gating
         short_query = len((input_data.user_message or "").split()) <= 4
@@ -631,22 +656,21 @@ async def chat_with_ai(input_data: ChatInput):
         # We have KB context → LLM rewrite (KB‑only, no Source lines)
         threshold_ok = gate_combined_ok
         mode_note = (
-            "Return ONLY the matched lines from the context in the same order. "
-            "Do not add any other lines or generic instructions."
             if filt_info.get("mode") == "exact" else
-            "Return a short, meaningful snippet (2-3 sentences) strictly based on the context."
         )
         enhanced_prompt = (
-            "Rewrite the following knowledge base context into clear, actionable steps for the user's question. "
-            "Use ONLY the provided context; do NOT add information that is not present in it. "
             f"{mode_note} "
-            "Do NOT include any document names, section titles, or 'Source:' lines in your output.\n\n"
             f"### Context\n{context}\n\n"
             f"### Question\n{input_data.user_message}\n\n"
             "### Output\n"
-            "- Provide concise bullets or sentences.\n"
-            "- If context is insufficient for an exact answer, add: 'This may be partial based on available KB.'\n"
         )
         headers = {"Content-Type": "application/json"}
@@ -670,7 +694,7 @@ async def chat_with_ai(input_data: ChatInput):
             bot_text = ""
         if not bot_text.strip():
-            # Fallback to the filtered context (never the full SOP chunk)
             bot_text = context
         bot_text = _strip_any_source_lines(bot_text).strip()
@@ -703,6 +727,7 @@ async def chat_with_ai(input_data: ChatInput):
                 "http_status": getattr(resp, "status_code", 0),
                 "filter_mode": filt_info.get("mode"),
                 "matched_count": filt_info.get("matched_count"),
             },
         }
@@ -802,7 +827,6 @@ def _set_incident_resolved(sys_id: str) -> bool:
         print(f"[SN PATCH resolve] exception={str(e)}")
         return False
 @app.post("/incident")
 async def raise_incident(input_data: IncidentInput):
     try:
@@ -829,7 +853,6 @@ async def raise_incident(input_data: IncidentInput):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/generate_ticket_desc")
 async def generate_ticket_desc_ep(input_data: TicketDescInput):
     try:
@@ -867,7 +890,6 @@ async def generate_ticket_desc_ep(input_data: TicketDescInput):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/incident_status")
 async def incident_status(input_data: TicketStatusInput):
     try:

 from pydantic import BaseModel
 from dotenv import load_dotenv
 from datetime import datetime
 # KB services (Chroma + sentence-transformers + BM25 hybrid)
 from services.kb_creation import (
     collection,
     ingest_documents,
+    hybrid_search_knowledge_base,  # intent-aware hybrid
 )
 # Optional routers/utilities you already have
     prev_status: Optional[str] = None  # "NO_KB_MATCH" | "PARTIAL" | "OK" | None
     last_issue: Optional[str] = None
 class IncidentInput(BaseModel):
     short_description: str
     description: str
     mark_resolved: Optional[bool] = False
 class TicketDescInput(BaseModel):
     issue: str
 class TicketStatusInput(BaseModel):
     sys_id: Optional[str] = None
     number: Optional[str] = None  # IncidentID (incident number)
 # ✅ Human‑readable mapping for ServiceNow incident state codes
 STATE_MAP = {
     "1": "New",
         "best_combined": best_combined,
     }
 def _strip_any_source_lines(text: str) -> str:
     lines = text.splitlines()
     kept = []
         kept.append(ln)
     return "\n".join(kept).strip()
 def _build_clarifying_message() -> str:
     return (
         "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:\n\n"
 # ---------- Intent helpers ----------
 def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> tuple[str, str]:
     issue = (issue_text or "").strip()
     resolved = (resolved_text or "").strip()
     short_desc = issue[:100] if issue else (resolved[:100] or "Issue resolved (user confirmation)")
     ).strip()
     return short_desc, long_desc
 def _is_incident_intent(msg_norm: str) -> bool:
     intent_phrases = [
         "create ticket", "create a ticket", "raise ticket", "raise a ticket", "open ticket", "open a ticket",
     ]
     return any(p in msg_norm for p in intent_phrases)
 def _is_feedback_message(msg_norm: str) -> bool:
     feedback_phrases = [
         "issue not resolved", "not resolved", "still not working",
     ]
     return any(p in msg_norm for p in feedback_phrases)
 def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
     status_keywords = [
         "status", "ticket status", "incident status",
                 return {"number": val.upper() if val.lower().startswith("inc") else val}
     return {"number": None, "ask_number": True}
 def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
     phrases = [
         "it is resolved", "resolved", "issue resolved", "problem resolved",
     ]
     return any(p in msg_norm for p in phrases)
 def _has_negation_resolved(msg_norm: str) -> bool:
     neg_phrases = [
         "not resolved", "issue not resolved", "still not working", "not working",
     ]
     return any(p in msg_norm for p in neg_phrases)
 def _classify_resolution_llm(user_message: str) -> bool:
     if not GEMINI_API_KEY:
         return False
     except Exception:
         return False
 def _is_generic_issue(msg_norm: str) -> bool:
     generic_phrases = [
         "issue", "have an issue", "having an issue", "got an issue",
         "problem", "have a problem", "help", "need help", "support",
 MAX_SENTENCES_STRICT = 4       # limit for exact-mode
 MAX_SENTENCES_CONCISE = 3      # limit for partial-mode
 def _normalize_for_match(text: str) -> str:
     t = (text or "").lower()
     t = re.sub(r"[^\w\s]", " ", t)      # remove punctuation
     t = re.sub(r"\s+", " ", t).strip()  # collapse spaces
     return t
 def _split_sentences(ctx: str) -> list[str]:
     # crude sentence split: punctuation/newlines/bullets/dashes
     raw_sents = re.split(r"(?<=[.!?])\s+|\n+|•\s*|-\s*", ctx or "")
     return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
 def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
     """
     Returns (filtered_text, info) where filtered_text is:
         'all_sentences': len(sentences)
     }
+# ---------- NEW: intent-specific line extractors (steps/navigation/errors) ----------
+STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+\.\s+|[•\-]\s+)")
+NAV_LINE_REGEX  = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
+def _extract_steps_only(text: str, max_lines: int = 12) -> str:
+    """
+    Keep only numbered/bulleted lines in original order.
+    Accepts formats like '1. ...', '2. ...', '• ...', '- ...'.
+    """
+    lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
+    kept = []
+    for ln in lines:
+        if STEP_LINE_REGEX.match(ln):
+            kept.append(ln)
+            if len(kept) >= max_lines:
+                break
+    return "\n".join(kept).strip() if kept else (text or "").strip()
+def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
+    lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
+    kept = []
+    for ln in lines:
+        if NAV_LINE_REGEX.search(ln) or ln.lower().startswith("log in"):
+            kept.append(ln)
+            if len(kept) >= max_lines:
+                break
+    return "\n".join(kept).strip() if kept else (text or "").strip()
+def _extract_errors_only(text: str, max_lines: int = 10) -> str:
+    lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
+    kept = []
+    for ln in lines:
+        if STEP_LINE_REGEX.match(ln) or ln.lower().startswith(("error", "resolution", "fix", "verify", "check")):
+            kept.append(ln)
+            if len(kept) >= max_lines:
+                break
+    return "\n".join(kept).strip() if kept else (text or "").strip()
 # ---------- Health ----------
 @app.get("/")
 async def health_check():
         kb_ctx = extract_kb_context(kb_results, top_chunks=2)
         context_raw = kb_ctx.get("context", "") or ""
+        # Filter to exact/concise and always preserve original order of matched sentences
         filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
         context = filtered_text
         context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
         best_distance = kb_ctx.get("best_score")    # lower = better
         best_combined = kb_ctx.get("best_combined") # higher = better
+        detected_intent = kb_results.get("user_intent", "neutral")
+        # Intent-shaped extraction (steps/navigation/errors)
+        q = (input_data.user_message or "").lower()
+        if detected_intent == "steps" or any(k in q for k in ["steps", "procedure", "perform", "do", "process"]):
+            context = _extract_steps_only(context, max_lines=12)
+        elif detected_intent == "errors" or any(k in q for k in ["error", "issue", "fail", "not working", "resolution", "fix"]):
+            context = _extract_errors_only(context, max_lines=10)
+        elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
+            context = _extract_navigation_only(context, max_lines=6)
+        # else: leave context as-is (concise filter already applied)
         # Dynamic gating
         short_query = len((input_data.user_message or "").split()) <= 4
         # We have KB context → LLM rewrite (KB‑only, no Source lines)
         threshold_ok = gate_combined_ok
         mode_note = (
+            "Return ONLY the matched lines from the context in the same order."
             if filt_info.get("mode") == "exact" else
+            "Return a short, meaningful snippet strictly based on the context."
         )
         enhanced_prompt = (
+            "From the provided context, output only the actionable steps/procedure relevant to the user's question. "
+            "Use ONLY the provided context; do NOT add information that is not present. "
             f"{mode_note} "
+            "Do NOT include any document names, section titles, or 'Source:' lines.\n\n"
             f"### Context\n{context}\n\n"
             f"### Question\n{input_data.user_message}\n\n"
             "### Output\n"
+            "- Return numbered/bulleted steps only, in the same order.\n"
+            "- If context is insufficient, add: 'This may be partial based on available KB.'\n"
         )
         headers = {"Content-Type": "application/json"}
             bot_text = ""
         if not bot_text.strip():
+            # Fallback to the filtered/intent-shaped context (never the full SOP chunk)
             bot_text = context
         bot_text = _strip_any_source_lines(bot_text).strip()
                 "http_status": getattr(resp, "status_code", 0),
                 "filter_mode": filt_info.get("mode"),
                 "matched_count": filt_info.get("matched_count"),
+                "user_intent": detected_intent,
             },
         }
         print(f"[SN PATCH resolve] exception={str(e)}")
         return False
 @app.post("/incident")
 async def raise_incident(input_data: IncidentInput):
     try:
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/generate_ticket_desc")
 async def generate_ticket_desc_ep(input_data: TicketDescInput):
     try:
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/incident_status")
 async def incident_status(input_data: TicketStatusInput):
     try: