Spaces:

ChatbotNova
/

Chatbot-Backend

Sleeping

App Files Files Community

srilakshu012456 commited on Dec 24, 2025

Commit

960dcf6

verified ·

1 Parent(s): 76caf03

Update main.py

Browse files

Files changed (1) hide show

main.py +211 -299

main.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import json
 import re
@@ -10,21 +11,20 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from dotenv import load_dotenv
 from datetime import datetime
 from services.kb_creation import (
     collection,
     ingest_documents,
     hybrid_search_knowledge_base,
-    get_section_text,             # NEW
-    get_best_steps_section_text,  # NEW
 )
 from services.login import router as login_router
 from services.generate_ticket import get_valid_token, create_incident
 VERIFY_SSL = os.getenv("SERVICENOW_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
 GEMINI_SSL_VERIFY = os.getenv("GEMINI_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
 def safe_str(e: Any) -> str:
     try:
         return builtins.str(e)
@@ -39,12 +39,12 @@ async def lifespan(app: FastAPI):
     try:
         folder_path = os.path.join(os.getcwd(), "documents")
         if collection.count() == 0:
-            print("🔍 KB empty. Running ingestion...")
             ingest_documents(folder_path)
         else:
-            print(f"✅ KB already populated with {collection.count()} entries. Skipping ingestion.")
     except Exception as e:
-        print(f"⚠️ KB ingestion failed: {safe_str(e)}")
     yield
 app = FastAPI(lifespan=lifespan)
@@ -91,76 +91,58 @@ GEMINI_URL = (
     f"gemini-2.5-flash-lite:generateContent?key={GEMINI_API_KEY}"
 )
-def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2) -> Dict[str, Any]:
-    if not kb_results or not isinstance(kb_results, dict):
-        return {"context": "", "sources": [], "top_hits": [], "context_found": False, "best_score": None, "best_combined": None}
-    documents = kb_results.get("documents") or []
-    metadatas = kb_results.get("metadatas") or []
-    distances = kb_results.get("distances") or []
-    combined = kb_results.get("combined_scores") or []
-    items = []
-    for i, doc in enumerate(documents):
-        text = doc.strip() if isinstance(doc, str) else ""
-        if not text:
-            continue
-        meta = metadatas[i] if i < len(metadatas) and isinstance(metadatas[i], dict) else {}
-        score = distances[i] if i < len(distances) else None
-        comb = combined[i] if i < len(combined) else None
-        m = dict(meta)
-        if score is not None:
-            m["distance"] = score
-        if comb is not None:
-            m["combined"] = comb
-        items.append({"text": text, "meta": m})
-    selected = items[:max(1, top_chunks)]
-    context = "\n\n---\n\n".join([s["text"] for s in selected]) if selected else ""
-    sources = [s["meta"] for s in selected]
-    best_distance = None
-    if distances:
-        try:
-            best_distance = min([d for d in distances if d is not None])
-        except Exception:
-            best_distance = None
-    best_combined = None
-    if combined:
-        try:
-            best_combined = max([c for c in combined if c is not None])
-        except Exception:
-            best_combined = None
-    return {
-        "context": context,
-        "sources": sources,
-        "top_hits": [],
-        "context_found": bool(selected),
-        "best_score": best_distance,
-        "best_combined": best_combined,
-    }
-def _strip_any_source_lines(text: str) -> str:
-    lines = text.splitlines()
-    kept = []
-    for ln in lines:
-        if re.match(r"^\s*source\s*:", ln, flags=re.IGNORECASE):
             continue
-        kept.append(ln)
-    return "\n".join(kept).strip()
 def _build_clarifying_message() -> str:
     return (
-        "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:\n\n"
-        "• Module/area (e.g., Picking, Receiving, Trailer Close)\n"
-        "• Exact error message text/code (copy-paste)\n"
-        "• IDs involved (Order#, Load ID, Shipment#)\n"
-        "• Warehouse/site & environment (prod/test)\n"
-        "• When it started and how many users are impacted\n\n"
-        "Reply with these details and I’ll search again."
     )
 def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[str, str]:
     issue = (issue_text or "").strip()
     resolved = (resolved_text or "").strip()
@@ -181,14 +163,6 @@ def _is_incident_intent(msg_norm: str) -> bool:
     ]
     return any(p in msg_norm for p in intent_phrases)
-def _is_feedback_message(msg_norm: str) -> bool:
-    feedback_phrases = [
-        "issue not resolved", "not resolved", "still not working",
-        "same issue", "no change", "didn't work", "doesn't work",
-        "not fixed", "still failing", "failed again",
-    ]
-    return any(p in msg_norm for p in feedback_phrases)
 def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
     status_keywords = ["status", "ticket status", "incident status", "check status", "check ticket status", "check incident status"]
     if not any(k in msg_norm for k in status_keywords):
@@ -221,8 +195,11 @@ def _classify_resolution_llm(user_message: str) -> bool:
     if not GEMINI_API_KEY:
         return False
     prompt = (
-        "Classify if the following user message indicates that the issue is resolved or working now.\n"
-        "Return only 'true' or 'false'.\n\n"
         f"Message: {user_message}"
     )
     headers = {"Content-Type": "application/json"}
@@ -240,16 +217,7 @@ def _classify_resolution_llm(user_message: str) -> bool:
     except Exception:
         return False
-def _is_generic_issue(msg_norm: str) -> bool:
-    generic_phrases = [
-        "issue", "have an issue", "having an issue", "got an issue",
-        "problem", "have a problem", "help", "need help", "support",
-        "need support", "please help", "need assistance", "assist me",
-        "facing issue", "facing a problem", "got a problem"
-    ]
-    return any(p == msg_norm or p in msg_norm for p in generic_phrases) or len(msg_norm.split()) <= 2
-# ---------- Query-normalized, order-preserving filter ----------
 STRICT_OVERLAP = 3
 MAX_SENTENCES_STRICT = 4
 MAX_SENTENCES_CONCISE = 3
@@ -261,7 +229,8 @@ def _normalize_for_match(text: str) -> str:
     return t
 def _split_sentences(ctx: str) -> List[str]:
-    raw_sents = re.split(r"(?<=[.!?])\s+|\n+|•\s*|-\s*", ctx or "")
     return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
 def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str, Any]]:
@@ -284,109 +253,49 @@ def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str,
             matched_any.append(s)
     if matched_exact:
         kept = matched_exact[:MAX_SENTENCES_STRICT]
-        return "\n".join(kept).strip(), {'mode': 'exact', 'matched_count': len(kept), 'all_sentences': len(sentences)}
     if matched_any:
         kept = matched_any[:MAX_SENTENCES_CONCISE]
-        return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': len(kept), 'all_sentences': len(sentences)}
     kept = sentences[:MAX_SENTENCES_CONCISE]
-    return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
-# ---------- intent & action specific extractors ----------
-STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", re.IGNORECASE)
-NAV_LINE_REGEX  = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
-PROCEDURE_VERBS = [
-    "log in", "select", "scan", "verify", "confirm", "print",
-    "move", "complete", "click", "open", "navigate", "choose",
-    "enter", "update", "save", "delete", "create", "attach", "assign"
-]
-VERB_START_REGEX = re.compile(r"^\s*(?:" + "|".join([re.escape(v) for v in PROCEDURE_VERBS]) + r")\b", re.IGNORECASE)
-NON_PROC_PHRASES = [
-    "to ensure", "as per", "purpose", "pre-requisites", "prerequisites", "overview", "introduction",
-    "organized manner", "structured", "help users", "objective"
-]
-NON_PROC_ANY_REGEX = re.compile("|".join([re.escape(v) for v in NON_PROC_PHRASES]), re.IGNORECASE)
-ACTION_SYNS_FLAT = {
-    "create": ["create", "creation", "add", "new", "generate"],
-    "update": ["update", "modify", "change", "edit"],
-    "delete": ["delete", "remove"],
-    "navigate": ["navigate", "go to", "open"],
-}
-def _action_in_line(ln: str, target_actions: List[str]) -> bool:
-    s = (ln or "").lower()
-    for act in target_actions:
-        for syn in ACTION_SYNS_FLAT.get(act, [act]):
-            if syn in s:
-                return True
-    return False
-def _is_procedural_line(ln: str) -> bool:
-    s = (ln or "").strip()
-    if not s:
-        return False
-    if NON_PROC_ANY_REGEX.search(s):
-        return False
-    if STEP_LINE_REGEX.match(s):
-        if s.lstrip().startswith(("•", "-")):
-            return bool(VERB_START_REGEX.search(s) or NAV_LINE_REGEX.search(s))
-        return True
-    if VERB_START_REGEX.match(s):
-        return True
-    if NAV_LINE_REGEX.search(s):
-        return True
-    return False
-def _extract_steps_only(text: str, max_lines: Optional[int] = 12, target_actions: Optional[List[str]] = None) -> str:
-    lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
-    kept = []
-    for ln in lines:
-        if _is_procedural_line(ln):
-            if target_actions:
-                if not _action_in_line(ln, target_actions):
-                    continue
-            kept.append(ln)
-            if max_lines is not None and len(kept) >= max_lines:
-                break
-    return "\n".join(kept).strip() if kept else (text or "").strip()
 def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
     lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
-    kept = []
     for ln in lines:
-        if NAV_LINE_REGEX.search(ln) or ln.lower().startswith("log in"):
             kept.append(ln)
-            if len(kept) >= max_lines:
-                break
-    return "\n".join(kept).strip() if kept else (text or "").strip()
-def _extract_errors_only(text: str, max_lines: int = 10) -> str:
     lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
-    kept = []
     for ln in lines:
-        if STEP_LINE_REGEX.match(ln) or ln.lower().startswith(("error", "resolution", "fix", "verify", "check")):
             kept.append(ln)
-            if len(kept) >= max_lines:
-                break
-    return "\n".join(kept).strip() if kept else (text or "").strip()
-def _format_steps_markdown(lines: List[str]) -> str:
-    """
-    Convert a list of step lines into a clean Markdown numbered list.
-    Keeps original order, trims whitespace, skips empty lines.
-    """
-    items = []
-    for i, ln in enumerate(lines, start=1):
-        s = (ln or "").strip()
-        if not s:
-            continue
-        # If the line already has leading "1. " or "• ", strip it so numbering is consistent
-        s = re.sub(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", "", s).strip()
-        items.append(f"{i}. {s}")
-    return "\n".join(items).strip()
 @app.get("/")
 async def health_check():
     return {"status": "ok"}
@@ -407,7 +316,7 @@ async def chat_with_ai(input_data: ChatInput):
             }
         if msg_norm in ("no", "no thanks", "nope"):
             return {
-                "bot_response": "Glad I could help! 👋 If you need anything else later, just let me know.",
                 "status": "OK",
                 "end_chat": True,
                 "followup": None,
@@ -415,7 +324,7 @@ async def chat_with_ai(input_data: ChatInput):
                 "debug": {"intent": "end_conversation"},
             }
-        # --- Resolution ack ---
         is_llm_resolved = _classify_resolution_llm(input_data.user_message)
         if _has_negation_resolved(msg_norm):
             is_llm_resolved = False
@@ -472,8 +381,12 @@ async def chat_with_ai(input_data: ChatInput):
         if _is_incident_intent(msg_norm):
             return {
                 "bot_response": (
-                    "Okay, let’s create a ServiceNow incident.\n\n"
-                    "Please provide:\n• Short Description (one line)\n"
                     "• Detailed Description (steps, error text, IDs, site, environment)"
                 ),
                 "status": (input_data.prev_status or "PARTIAL"),
@@ -488,21 +401,14 @@ async def chat_with_ai(input_data: ChatInput):
             }
         # --- Generic opener ---
-        if _is_generic_issue(msg_norm):
             return {
-                "bot_response": (
-                    "Sure, I can help. Please describe your issue:\n"
-                    "• Module/area (e.g., Picking, Receiving, Trailer Close)\n"
-                    "• Exact error message text/code (copy-paste)\n"
-                    "• IDs involved (Order#, Load ID, Shipment#)\n"
-                    "• Warehouse/site & environment (prod/test)\n"
-                    "• When it started and how many users are impacted"
-                ),
                 "status": "NO_KB_MATCH",
                 "context_found": False,
                 "ask_resolved": False,
-                "suggest_incident": False,
-                "followup": "Please reply with the above details.",
                 "top_hits": [],
                 "sources": [],
                 "debug": {"intent": "generic_issue"},
@@ -542,8 +448,10 @@ async def chat_with_ai(input_data: ChatInput):
                 num = result.get("number", number or "unknown")
                 return {
                     "bot_response": (
-                        f"**Ticket:** {num} \n"
-                        f"**Status:** {state_label} \n"
                         f"**Issue description:** {short}"
                     ),
                     "status": "OK",
@@ -561,48 +469,67 @@ async def chat_with_ai(input_data: ChatInput):
         # --- Hybrid KB search ---
         kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
-        kb_ctx = extract_kb_context(kb_results, top_chunks=2)
-        context_raw = kb_ctx.get("context", "") or ""
         filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
         context = filtered_text
-        context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
-        best_distance = kb_ctx.get("best_score")
-        best_combined = kb_ctx.get("best_combined")
         detected_intent = kb_results.get("user_intent", "neutral")
         actions = kb_results.get("actions", [])
         best_doc = kb_results.get("best_doc")
-        top_meta = (kb_results.get("metadatas") or [{}])[0] if (kb_results.get("metadatas") or []) else {}
-        # --- FULL SECTION when strongly found & steps intent ---
         if detected_intent == "steps" and best_doc:
-            # prefer full 'Process Steps' section from the best SOP
             full_steps = get_best_steps_section_text(best_doc)
             if not full_steps:
-                # fallback: full text of the top section
                 sec = (top_meta or {}).get("section")
                 if sec:
                     full_steps = get_section_text(best_doc, sec)
             if full_steps:
-                # If confidence is high, return ALL procedural lines (no truncation)
-                high_conf = (best_combined is not None and best_combined >= 0.75)
-                context = _extract_steps_only(full_steps, max_lines=None if high_conf else 20, target_actions=actions)
-        # Intent-shaping (secondary; if not already handled above)
         q = (input_data.user_message or "").lower()
-        if detected_intent == "steps" or any(k in q for k in ["steps", "procedure", "perform", "do", "process"]):
-            context = _extract_steps_only(context, max_lines=None if (best_combined and best_combined >= 0.75) else 12, target_actions=actions)
-        elif detected_intent == "errors" or any(k in q for k in ["error", "issue", "fail", "not working", "resolution", "fix"]):
-            context = _extract_errors_only(context, max_lines=10)
         elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
             context = _extract_navigation_only(context, max_lines=6)
         # Gating
         short_query = len((input_data.user_message or "").split()) <= 4
         gate_combined_no_kb = 0.22 if short_query else 0.28
         gate_combined_ok = 0.60 if short_query else 0.55
         gate_distance_no_kb = 2.0
         if (not context_found or not context.strip()) or (
             (best_combined is None or best_combined < gate_combined_no_kb)
             and (best_distance is None or best_distance >= gate_distance_no_kb)
@@ -618,29 +545,38 @@ async def chat_with_ai(input_data: ChatInput):
                 "status": "NO_KB_MATCH",
                 "context_found": False,
                 "ask_resolved": False,
-                "suggest_incident": bool(second_try),
-                "followup": ("Please reply with the above details." if not second_try else "Shall I create a ticket now?"),
                 "top_hits": [],
                 "sources": [],
                 "debug": {"used_chunks": 0, "second_try": second_try, "best_distance": best_distance, "best_combined": best_combined},
             }
-        # LLM rewrite (kept, but we still fallback cleanly)
         enhanced_prompt = (
-            "From the provided context, output only the actionable steps/procedure relevant to the user's question. "
-            "Use ONLY the provided context; do NOT add information that is not present. "
-            + ("Return ONLY lines containing the requested action verbs. " if actions else "")
-            + "Do NOT include document names, section titles, or 'Source:' lines.\n\n"
-            f"### Context\n{context}\n\n"
-            f"### Question\n{input_data.user_message}\n\n"
-            "### Output\n"
-            "- Return numbered/bulleted steps in the same order.\n"
-            "- If context is insufficient, add: 'This may be partial based on available KB.'\n"
         )
         headers = {"Content-Type": "application/json"}
         payload = {"contents": [{"parts": [{"text": enhanced_prompt}]}]}
         try:
-            resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=VERIFY_SSL)
             try:
                 result = resp.json()
             except Exception:
@@ -648,42 +584,17 @@ async def chat_with_ai(input_data: ChatInput):
         except Exception:
             resp = type("RespStub", (), {"status_code": 0})()
             result = {}
         try:
-            bot_text = (result["candidates"][0]["content"]["parts"][0]["text"] if isinstance(result, dict) else "")
         except Exception:
             bot_text = ""
         if not bot_text.strip():
             bot_text = context
-        bot_text = _strip_any_source_lines(bot_text).strip()
-# If the intent is steps, render lines as a numbered Markdown list
-        if kb_results.get("user_intent", "neutral") == "steps":
-           raw_lines = [ln.strip() for ln in bot_text.splitlines() if ln.strip()]
-    # If everything is on a single line, split defensively on ". "
-           if len(raw_lines) == 1:
-              parts = [p.strip() for p in re.split(r"\.\s+(?=[A-Z0-9])", raw_lines[0]) if p.strip()]
-              raw_lines = parts if len(parts) > 1 else raw_lines
-    # 🔴 NEW: merge number-only lines with the next line
-        merged: list[str] = []
-        i = 0
-        while i < len(raw_lines):
-           curr = raw_lines[i]
-        # A number-only line (e.g., "1", "2", "3")
-           if re.fullmatch(r"\d+", curr) and (i + 1) < len(raw_lines):
-              nxt = raw_lines[i + 1].strip()
-            # Combine into one line: "1. <next line text>"
-              merged.append(f"{curr}. {nxt}")
-              i += 2  # skip the next line; already merged
-           else:
-               merged.append(curr)
-               i += 1
-    # Finally: normalize and render as Markdown numbered list
-        bot_text = _format_steps_markdown(merged)
         status = "OK" if (
             (best_combined is not None and best_combined >= gate_combined_ok)
@@ -699,12 +610,16 @@ async def chat_with_ai(input_data: ChatInput):
             "status": status,
             "context_found": True,
             "ask_resolved": (status == "OK"),
-            "suggest_incident": False,
-            "followup": ("Does this match your scenario? I can refine the steps." if status == "PARTIAL" else None),
             "top_hits": [],
             "sources": [],
             "debug": {
-                "used_chunks": len(context.split("\n\n---\n\n")) if context else 0,
                 "best_distance": best_distance,
                 "best_combined": best_combined,
                 "http_status": getattr(resp, "status_code", 0),
@@ -721,6 +636,7 @@ async def chat_with_ai(input_data: ChatInput):
     except Exception as e:
         raise HTTPException(status_code=500, detail=safe_str(e))
 def _set_incident_resolved(sys_id: str) -> bool:
     try:
         token = get_valid_token()
@@ -734,14 +650,12 @@ def _set_incident_resolved(sys_id: str) -> bool:
             "Content-Type": "application/json",
         }
         url = f"{instance_url}/api/now/table/incident/{sys_id}"
         close_code_val = os.getenv("SERVICENOW_CLOSE_CODE", "Solution provided")
         close_notes_val = os.getenv("SERVICENOW_RESOLUTION_NOTES", "Issue resolved, user confirmed")
         caller_sysid = os.getenv("SERVICENOW_CALLER_SYSID")
         resolved_by_sysid = os.getenv("SERVICENOW_RESOLVED_BY_SYSID")
         assign_group = os.getenv("SERVICENOW_ASSIGNMENT_GROUP_SYSID")
         require_progress = os.getenv("SERVICENOW_REQUIRE_IN_PROGRESS_FIRST", "false").lower() in ("1", "true", "yes")
         if require_progress:
             try:
                 resp1 = requests.patch(url, headers=headers, json={"state": "2"}, verify=VERIFY_SSL, timeout=25)
@@ -814,16 +728,16 @@ async def raise_incident(input_data: IncidentInput):
             if bool(input_data.mark_resolved) and sys_id not in ("<unknown>", None):
                 ok = _set_incident_resolved(sys_id)
                 resolved_note = " (marked Resolved)" if ok else " (could not mark Resolved; please update manually)"
-            ticket_text = f"Incident created: {inc_number}{resolved_note}"
         else:
-            ticket_text = "Incident created."
-        return {
-            "bot_response": f"✅ {ticket_text}",
-            "debug": "Incident created via ServiceNow",
-            "persist": True,
-            "show_assist_card": True,
-            "followup": "Is there anything else I can assist you with?",
-        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=safe_str(e))
@@ -831,17 +745,23 @@ async def raise_incident(input_data: IncidentInput):
 async def generate_ticket_desc_ep(input_data: TicketDescInput):
     try:
         prompt = (
-            f"You are helping generate ServiceNow ticket descriptions based on the issue: {input_data.issue}.\n"
-            "Please return the output strictly in JSON format with the following keys:\n"
-            "{\n"
-            ' "ShortDescription": "A concise summary of the issue (max 100 characters)",\n'
-            ' "DetailedDescription": "A detailed explanation of the issue"\n'
-            "}\n"
             "Do not include any extra text, comments, or explanations outside the JSON."
         )
         headers = {"Content-Type": "application/json"}
         payload = {"contents": [{"parts": [{"text": prompt}]}]}
-        resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=GEMINI_GSL_VERIFY if 'GEMINI_GSL_VERIFY' in globals() else GEMINI_SSL_VERIFY)
         try:
             data = resp.json()
         except Exception:
@@ -852,7 +772,8 @@ async def generate_ticket_desc_ep(input_data: TicketDescInput):
             return {"ShortDescription": "", "DetailedDescription": "", "error": "Gemini parsing failed"}
         if text.startswith("```"):
             lines = [ln for ln in text.splitlines() if not ln.strip().startswith("```")]
-            text = "\n".join(lines).strip()
         try:
             ticket_json = json.loads(text)
             return {
@@ -891,10 +812,14 @@ async def incident_status(input_data: TicketStatusInput):
         number = result.get("number", input_data.number or "unknown")
         return {
             "bot_response": (
-                f"**Ticket:** {number} \n"
-                f"**Status:** {state_label} \n"
                 f"**Issue description:** {short}"
-            ).replace("\n", " \n"),
             "followup": "Is there anything else I can assist you with?",
             "show_assist_card": True,
             "persist": True,
@@ -902,16 +827,3 @@ async def incident_status(input_data: TicketStatusInput):
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=safe_str(e))
-# ---- Admin endpoints (optional) ----
-@app.get("/kb/info")
-async def kb_info():
-    from services.kb_creation import get_kb_runtime_info
-    return get_kb_runtime_info()
-@app.post("/kb/reset")
-async def kb_reset():
-    from services.kb_creation import reset_kb
-    folder_path = os.path.join(os.getcwd(), "documents")
-    return reset_kb(folder_path)

 import os
 import json
 import re
 from pydantic import BaseModel
 from dotenv import load_dotenv
 from datetime import datetime
 from services.kb_creation import (
     collection,
     ingest_documents,
     hybrid_search_knowledge_base,
+    get_section_text,
+    get_best_steps_section_text,
 )
 from services.login import router as login_router
 from services.generate_ticket import get_valid_token, create_incident
 VERIFY_SSL = os.getenv("SERVICENOW_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
 GEMINI_SSL_VERIFY = os.getenv("GEMINI_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
 def safe_str(e: Any) -> str:
     try:
         return builtins.str(e)
     try:
         folder_path = os.path.join(os.getcwd(), "documents")
         if collection.count() == 0:
+            print("[KB] empty. Running ingestion...")
             ingest_documents(folder_path)
         else:
+            print(f"[KB] already populated with {collection.count()} entries. Skipping ingestion.")
     except Exception as e:
+        print(f"[KB] ingestion failed: {safe_str(e)}")
     yield
 app = FastAPI(lifespan=lifespan)
     f"gemini-2.5-flash-lite:generateContent?key={GEMINI_API_KEY}"
 )
+# ---------------- Helper: consistently format numbered steps ----------------
+def _format_steps_markdown(lines: List[str]) -> str:
+    items: List[str] = []
+    for i, ln in enumerate(lines, start=1):
+        s = (ln or "").strip()
+        if not s:
             continue
+        s = re.sub(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", "", s).strip()
+        items.append(f"{i}. {s}")
+    return "
+".join(items).strip()
+def _as_numbered_steps(text: str) -> str:
+    raw_lines: List[str] = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
+    if len(raw_lines) == 1:
+        parts = [p.strip() for p in re.split(r"\.\s+(?=[A-Z0-9])", raw_lines[0]) if p.strip()]
+        if len(parts) > 1:
+            raw_lines = parts
+    merged: List[str] = []
+    i = 0
+    while i < len(raw_lines):
+        curr = raw_lines[i]
+        if re.fullmatch(r"\d+", curr) and (i + 1) < len(raw_lines):
+            nxt = raw_lines[i + 1].strip()
+            merged.append(f"{curr}. {nxt}")
+            i += 2
+        else:
+            merged.append(curr)
+            i += 1
+    return _format_steps_markdown(merged)
+# ---------------- Clarifying message (with ticket option) ----------------
 def _build_clarifying_message() -> str:
     return (
+        "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:
+"
+        "• Module/area (e.g., Picking, Receiving, Trailer Close)
+"
+        "• Exact error message text/code (copy-paste)
+"
+        "• IDs involved (Order#, Load ID, Shipment#)
+"
+        "• Warehouse/site & environment (prod/test)
+"
+        "• When it started and how many users are impacted
+"
+        "You can also say ‘create ticket’ and I’ll raise a ServiceNow incident now."
     )
+# ---------------- Resolution/Incident helpers ----------------
 def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[str, str]:
     issue = (issue_text or "").strip()
     resolved = (resolved_text or "").strip()
     ]
     return any(p in msg_norm for p in intent_phrases)
 def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
     status_keywords = ["status", "ticket status", "incident status", "check status", "check ticket status", "check incident status"]
     if not any(k in msg_norm for k in status_keywords):
     if not GEMINI_API_KEY:
         return False
     prompt = (
+        "Classify if the following user message indicates that the issue is resolved or working now.
+"
+        "Return only 'true' or 'false'.
+"
         f"Message: {user_message}"
     )
     headers = {"Content-Type": "application/json"}
     except Exception:
         return False
+# ---------------- Query-normalized, order-preserving filter ----------------
 STRICT_OVERLAP = 3
 MAX_SENTENCES_STRICT = 4
 MAX_SENTENCES_CONCISE = 3
     return t
 def _split_sentences(ctx: str) -> List[str]:
+    raw_sents = re.split(r"(?<=[.!?])\s+|
++|•\s*|\-\s*", ctx or "")
     return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
 def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str, Any]]:
             matched_any.append(s)
     if matched_exact:
         kept = matched_exact[:MAX_SENTENCES_STRICT]
+        return "
+".join(kept).strip(), {'mode': 'exact', 'matched_count': len(kept), 'all_sentences': len(sentences)}
     if matched_any:
         kept = matched_any[:MAX_SENTENCES_CONCISE]
+        return "
+".join(kept).strip(), {'mode': 'concise', 'matched_count': len(kept), 'all_sentences': len(sentences)}
     kept = sentences[:MAX_SENTENCES_CONCISE]
+    return "
+".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
+# ---------------- Navigation extraction ----------------
+NAV_LINE_REGEX = re.compile(r"(navigate\s+to|login|log in|menu|screen)", re.IGNORECASE)
 def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
     lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
+    kept: List[str] = []
     for ln in lines:
+        if NAV_LINE_REGEX.search(ln):
             kept.append(ln)
+        if len(kept) >= max_lines:
+            break
+    return "
+".join(kept).strip() if kept else (text or "").strip()
+# ---------------- Errors extraction (tightened for auth/role/access) ----------------
+ERROR_STARTS = (
+    "error", "resolution", "fix", "verify", "check",
+    "permission", "access", "authorization", "authorisation",
+    "role", "role mapping", "security profile", "escalation"
+)
+def _extract_errors_only(text: str, max_lines: int = 12) -> str:
     lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
+    kept: List[str] = []
     for ln in lines:
+        low = ln.lower()
+        if low.startswith(ERROR_STARTS) or any(key in low for key in ERROR_STARTS):
             kept.append(ln)
+        if len(kept) >= max_lines:
+            break
+    return "
+".join(kept).strip() if kept else (text or "").strip()
 @app.get("/")
 async def health_check():
     return {"status": "ok"}
             }
         if msg_norm in ("no", "no thanks", "nope"):
             return {
+                "bot_response": "Glad I could help! If you need anything else later, just let me know.",
                 "status": "OK",
                 "end_chat": True,
                 "followup": None,
                 "debug": {"intent": "end_conversation"},
             }
+        # --- Resolution ack (auto incident + mark Resolved) ---
         is_llm_resolved = _classify_resolution_llm(input_data.user_message)
         if _has_negation_resolved(msg_norm):
             is_llm_resolved = False
         if _is_incident_intent(msg_norm):
             return {
                 "bot_response": (
+                    "Okay, let’s create a ServiceNow incident.
+"
+                    "Please provide:
+• Short Description (one line)
+"
                     "• Detailed Description (steps, error text, IDs, site, environment)"
                 ),
                 "status": (input_data.prev_status or "PARTIAL"),
             }
         # --- Generic opener ---
+        if len(msg_norm.split()) <= 2 or any(p in msg_norm for p in ("issue", "problem", "help", "support")):
             return {
+                "bot_response": _build_clarifying_message(),
                 "status": "NO_KB_MATCH",
                 "context_found": False,
                 "ask_resolved": False,
+                "suggest_incident": True,  # offer ticket immediately
+                "followup": "Reply with the above details or say ‘create ticket’.",
                 "top_hits": [],
                 "sources": [],
                 "debug": {"intent": "generic_issue"},
                 num = result.get("number", number or "unknown")
                 return {
                     "bot_response": (
+                        f"**Ticket:** {num}
+"
+                        f"**Status:** {state_label}
+"
                         f"**Issue description:** {short}"
                     ),
                     "status": "OK",
         # --- Hybrid KB search ---
         kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
+        documents = kb_results.get("documents", [])
+        metadatas = kb_results.get("metadatas", [])
+        distances = kb_results.get("distances", [])
+        combined = kb_results.get("combined_scores", [])
+        items: List[Dict[str, Any]] = []
+        for i, doc in enumerate(documents):
+            text = doc.strip() if isinstance(doc, str) else ""
+            if not text:
+                continue
+            meta = metadatas[i] if i < len(metadatas) and isinstance(metadatas[i], dict) else {}
+            score = distances[i] if i < len(distances) else None
+            comb = combined[i] if i < len(combined) else None
+            m = dict(meta)
+            if score is not None:
+                m["distance"] = score
+            if comb is not None:
+                m["combined"] = comb
+            items.append({"text": text, "meta": m})
+        selected = items[:max(1, 2)]
+        context_raw = "
+---
+".join([s["text"] for s in selected]) if selected else ""
         filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
         context = filtered_text
+        context_found = bool(context.strip())
+        best_distance = min([d for d in distances if d is not None], default=None) if distances else None
+        best_combined = max([c for c in combined if c is not None], default=None) if combined else None
         detected_intent = kb_results.get("user_intent", "neutral")
         actions = kb_results.get("actions", [])
         best_doc = kb_results.get("best_doc")
+        top_meta = (metadatas or [{}])[0] if metadatas else {}
         if detected_intent == "steps" and best_doc:
             full_steps = get_best_steps_section_text(best_doc)
             if not full_steps:
                 sec = (top_meta or {}).get("section")
                 if sec:
                     full_steps = get_section_text(best_doc, sec)
             if full_steps:
+                context = _as_numbered_steps(full_steps)
+        # Intent shaping
         q = (input_data.user_message or "").lower()
+        if detected_intent == "errors" or any(k in q for k in [
+            "error", "issue", "fail", "not working", "resolution", "fix",
+            "permission", "access", "authorization", "escalation", "role", "security profile"
+        ]):
+            context = _extract_errors_only(context, max_lines=12)
         elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
             context = _extract_navigation_only(context, max_lines=6)
+        elif detected_intent == "steps":
+            context = _as_numbered_steps(context)
         # Gating
         short_query = len((input_data.user_message or "").split()) <= 4
         gate_combined_no_kb = 0.22 if short_query else 0.28
         gate_combined_ok = 0.60 if short_query else 0.55
         gate_distance_no_kb = 2.0
         if (not context_found or not context.strip()) or (
             (best_combined is None or best_combined < gate_combined_no_kb)
             and (best_distance is None or best_distance >= gate_distance_no_kb)
                 "status": "NO_KB_MATCH",
                 "context_found": False,
                 "ask_resolved": False,
+                "suggest_incident": True,
+                "followup": ("Reply with the above details or say ‘create ticket’." if not second_try else "Shall I create a ticket now?"),
                 "top_hits": [],
                 "sources": [],
                 "debug": {"used_chunks": 0, "second_try": second_try, "best_distance": best_distance, "best_combined": best_combined},
             }
+        # LLM rewrite (constrained to provided context)
         enhanced_prompt = (
+            "From the provided context, output only the actionable content relevant to the user's question. "
+            "Use ONLY the provided context; do NOT add information that is not present.
+"
+            f"### Context
+{context}
+"
+            f"### Question
+{input_data.user_message}
+"
+            "### Output
+"
+            "- Return numbered/bulleted steps in the same order when appropriate.
+"
+            "- If context is insufficient, add: 'This may be partial based on available KB.'
+"
         )
         headers = {"Content-Type": "application/json"}
         payload = {"contents": [{"parts": [{"text": enhanced_prompt}]}]}
         try:
+            resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=GEMINI_SSL_VERIFY)
             try:
                 result = resp.json()
             except Exception:
         except Exception:
             resp = type("RespStub", (), {"status_code": 0})()
             result = {}
         try:
+            bot_text = result["candidates"][0]["content"]["parts"][0]["text"] if isinstance(result, dict) else ""
         except Exception:
             bot_text = ""
         if not bot_text.strip():
             bot_text = context
+        bot_text = "
+".join([ln for ln in bot_text.splitlines() if not re.match(r"^\s*source\s*:", ln, flags=re.IGNORECASE)]).strip()
+        if detected_intent == "steps":
+            bot_text = _as_numbered_steps(bot_text)
         status = "OK" if (
             (best_combined is not None and best_combined >= gate_combined_ok)
             "status": status,
             "context_found": True,
             "ask_resolved": (status == "OK"),
+            "suggest_incident": (status == "PARTIAL"),
+            "followup": ("Is this helpful or should I raise a ticket?" if status == "PARTIAL" else None),
             "top_hits": [],
             "sources": [],
             "debug": {
+                "used_chunks": len(context.split("
+---
+")) if context else 0,
                 "best_distance": best_distance,
                 "best_combined": best_combined,
                 "http_status": getattr(resp, "status_code", 0),
     except Exception as e:
         raise HTTPException(status_code=500, detail=safe_str(e))
 def _set_incident_resolved(sys_id: str) -> bool:
     try:
         token = get_valid_token()
             "Content-Type": "application/json",
         }
         url = f"{instance_url}/api/now/table/incident/{sys_id}"
         close_code_val = os.getenv("SERVICENOW_CLOSE_CODE", "Solution provided")
         close_notes_val = os.getenv("SERVICENOW_RESOLUTION_NOTES", "Issue resolved, user confirmed")
         caller_sysid = os.getenv("SERVICENOW_CALLER_SYSID")
         resolved_by_sysid = os.getenv("SERVICENOW_RESOLVED_BY_SYSID")
         assign_group = os.getenv("SERVICENOW_ASSIGNMENT_GROUP_SYSID")
         require_progress = os.getenv("SERVICENOW_REQUIRE_IN_PROGRESS_FIRST", "false").lower() in ("1", "true", "yes")
         if require_progress:
             try:
                 resp1 = requests.patch(url, headers=headers, json={"state": "2"}, verify=VERIFY_SSL, timeout=25)
             if bool(input_data.mark_resolved) and sys_id not in ("<unknown>", None):
                 ok = _set_incident_resolved(sys_id)
                 resolved_note = " (marked Resolved)" if ok else " (could not mark Resolved; please update manually)"
+            ticket_text = f"Incident created: {inc_number}{resolved_note}" if inc_number else "Incident created."
+            return {
+                "bot_response": f"✅ {ticket_text}",
+                "debug": "Incident created via ServiceNow",
+                "persist": True,
+                "show_assist_card": True,
+                "followup": "Is there anything else I can assist you with?",
+            }
         else:
+            raise HTTPException(status_code=500, detail=(result or {}).get("error", "Unknown error"))
     except Exception as e:
         raise HTTPException(status_code=500, detail=safe_str(e))
 async def generate_ticket_desc_ep(input_data: TicketDescInput):
     try:
         prompt = (
+            f"You are helping generate ServiceNow ticket descriptions based on the issue: {input_data.issue}.
+"
+            "Please return the output strictly in JSON format with the following keys:
+"
+            "{
+"
+            '  "ShortDescription": "A concise summary of the issue (max 100 characters)",
+'
+            '  "DetailedDescription": "A detailed explanation of the issue"
+'
+            "}
+"
             "Do not include any extra text, comments, or explanations outside the JSON."
         )
         headers = {"Content-Type": "application/json"}
         payload = {"contents": [{"parts": [{"text": prompt}]}]}
+        resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=GEMINI_SSL_VERIFY)
         try:
             data = resp.json()
         except Exception:
             return {"ShortDescription": "", "DetailedDescription": "", "error": "Gemini parsing failed"}
         if text.startswith("```"):
             lines = [ln for ln in text.splitlines() if not ln.strip().startswith("```")]
+            text = "
+".join(lines).strip()
         try:
             ticket_json = json.loads(text)
             return {
         number = result.get("number", input_data.number or "unknown")
         return {
             "bot_response": (
+                f"**Ticket:** {number}
+"
+                f"**Status:** {state_label}
+"
                 f"**Issue description:** {short}"
+            ).replace("
+", "
+"),
             "followup": "Is there anything else I can assist you with?",
             "show_assist_card": True,
             "persist": True,
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=safe_str(e))