Spaces:

ChatbotNova
/

Chatbot-Backend

Sleeping

App Files Files Community

srilakshu012456 commited on Dec 23, 2025

Commit

192969d

verified ·

1 Parent(s): 82c195c

Update main.py

Browse files

Files changed (1) hide show

main.py +82 -243

main.py CHANGED Viewed

@@ -11,49 +11,38 @@ from pydantic import BaseModel
 from dotenv import load_dotenv
 from datetime import datetime
-# KB services (Chroma + sentence-transformers + BM25 hybrid)
 from services.kb_creation import (
     collection,
     ingest_documents,
-    hybrid_search_knowledge_base,  # intent-aware hybrid
 )
-# Optional routers/utilities you already have
-from services.login import router as login_router  # login API router
-from services.generate_ticket import get_valid_token, create_incident  # ServiceNow helpers
 VERIFY_SSL = os.getenv("SERVICENOW_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
 GEMINI_SSL_VERIFY = os.getenv("GEMINI_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
-# ---------- Env & App bootstrap ----------
 load_dotenv()
-os.environ["POSTHOG_DISABLED"] = "true"  # Disable telemetry if present
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    """
-    On startup: populate KB if empty.
-    """
     try:
         folder_path = os.path.join(os.getcwd(), "documents")
         if collection.count() == 0:
             print("🔍 KB empty. Running ingestion...")
-            ingest_documents(folder_path)  # walks /documents & ingests .docx
         else:
             print(f"✅ KB already populated with {collection.count()} entries. Skipping ingestion.")
     except Exception as e:
         print(f"⚠️ KB ingestion failed: {e}")
     yield
 app = FastAPI(lifespan=lifespan)
 app.include_router(login_router)
-# CORS (adjust origins as needed)
-origins = [
-    "https://chatbotnova-chatbot-frontend.hf.space",
-]
 app.add_middleware(
     CORSMiddleware,
     allow_origins=origins,
@@ -62,10 +51,9 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# ---------- Models ----------
 class ChatInput(BaseModel):
     user_message: str
-    prev_status: Optional[str] = None  # "NO_KB_MATCH" | "PARTIAL" | "OK" | None
     last_issue: Optional[str] = None
 class IncidentInput(BaseModel):
@@ -78,9 +66,8 @@ class TicketDescInput(BaseModel):
 class TicketStatusInput(BaseModel):
     sys_id: Optional[str] = None
-    number: Optional[str] = None  # IncidentID (incident number)
-# ✅ Human‑readable mapping for ServiceNow incident state codes
 STATE_MAP = {
     "1": "New",
     "2": "In Progress",
@@ -90,38 +77,20 @@ STATE_MAP = {
     "8": "Canceled",
 }
-# ---------- Gemini setup ----------
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GEMINI_URL = (
     f"https://generativelanguage.googleapis.com/v1beta/models/"
     f"gemini-2.5-flash-lite:generateContent?key={GEMINI_API_KEY}"
 )
-# ---------- Helpers: KB context merge + sanitation ----------
 def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2) -> Dict[str, Any]:
-    """
-    Merge documents + metadatas + distances.
-    If documents are missing but ids exist, fetch via collection.get (rare).
-    Supports hybrid fields: 'combined_scores' in results.
-    """
     if not kb_results or not isinstance(kb_results, dict):
         return {"context": "", "sources": [], "top_hits": [], "context_found": False, "best_score": None, "best_combined": None}
     documents = kb_results.get("documents") or []
     metadatas = kb_results.get("metadatas") or []
     distances = kb_results.get("distances") or []
-    ids = kb_results.get("ids") or []
     combined = kb_results.get("combined_scores") or []
-    # Fallback fetch by ids if needed
-    if (not documents) and ids:
-        try:
-            fetched = collection.get(ids=ids, include=['documents', 'metadatas'])
-            documents = fetched.get('documents', []) or []
-            metadatas = fetched.get('metadatas', []) or metadatas
-        except Exception:
-            pass
     items = []
     for i, doc in enumerate(documents):
         text = doc.strip() if isinstance(doc, str) else ""
@@ -141,14 +110,12 @@ def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2
     context = "\n\n---\n\n".join([s["text"] for s in selected]) if selected else ""
     sources = [s["meta"] for s in selected]
-    # best (lower distance is better, higher combined is better)
     best_distance = None
     if distances:
         try:
             best_distance = min([d for d in distances if d is not None])
         except Exception:
             best_distance = None
     best_combined = None
     if combined:
         try:
@@ -159,7 +126,7 @@ def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2
     return {
         "context": context,
         "sources": sources,
-        "top_hits": [],  # hidden in UI
         "context_found": bool(selected),
         "best_score": best_distance,
         "best_combined": best_combined,
@@ -179,13 +146,12 @@ def _build_clarifying_message() -> str:
         "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:\n\n"
         "• Module/area (e.g., Picking, Receiving, Trailer Close)\n"
         "• Exact error message text/code (copy-paste)\n"
-        "• IDs involved (Order#, Load ID, Shipment#, Item#)\n"
         "• Warehouse/site & environment (prod/test)\n"
         "• When it started and how many users are impacted\n\n"
         "Reply with these details and I’ll search again."
     )
-# ---------- Intent helpers ----------
 def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> tuple[str, str]:
     issue = (issue_text or "").strip()
     resolved = (resolved_text or "").strip()
@@ -215,16 +181,10 @@ def _is_feedback_message(msg_norm: str) -> bool:
     return any(p in msg_norm for p in feedback_phrases)
 def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
-    status_keywords = [
-        "status", "ticket status", "incident status",
-        "check status", "check ticket status", "check incident status",
-    ]
     if not any(k in msg_norm for k in status_keywords):
-        return {}  # not a status intent
-    patterns = [
-        r"(?:incident\s*id|incidentid|ticket\s*number|number)\s*[:=]?\s*(inc\d+)",
-        r"(inc\d+)",
-    ]
     for pat in patterns:
         m = re.search(pat, msg_norm, flags=re.IGNORECASE)
         if m:
@@ -262,13 +222,8 @@ def _classify_resolution_llm(user_message: str) -> bool:
         resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=12, verify=GEMINI_SSL_VERIFY)
         data = resp.json()
         text = (
-            data.get("candidates", [{}])[0]
-            .get("content", {})
-            .get("parts", [{}])[0]
-            .get("text", "")
-            .strip()
-            .lower()
-        )
         return "true" in text
     except Exception:
         return False
@@ -282,82 +237,52 @@ def _is_generic_issue(msg_norm: str) -> bool:
     ]
     return any(p == msg_norm or p in msg_norm for p in generic_phrases) or len(msg_norm.split()) <= 2
-# ---------- NEW: Query-normalized, order-preserving filter ----------
-STRICT_OVERLAP = 3             # ≥3 shared terms → treat as exact match
-MAX_SENTENCES_STRICT = 4       # limit for exact-mode
-MAX_SENTENCES_CONCISE = 3      # limit for partial-mode
 def _normalize_for_match(text: str) -> str:
     t = (text or "").lower()
-    t = re.sub(r"[^\w\s]", " ", t)      # remove punctuation
-    t = re.sub(r"\s+", " ", t).strip()  # collapse spaces
     return t
 def _split_sentences(ctx: str) -> list[str]:
-    # crude sentence split: punctuation/newlines/bullets/dashes
     raw_sents = re.split(r"(?<=[.!?])\s+|\n+|•\s*|-\s*", ctx or "")
     return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
 def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
-    """
-    Returns (filtered_text, info) where filtered_text is:
-    - Exact-mode: ONLY sentences with strong overlap, preserving doc order.
-    - Concise-mode: First few sentences with some overlap, preserving order.
-    info: { 'mode': 'exact'|'concise', 'matched_count': int, 'all_sentences': int }
-    """
     ctx = (context or "").strip()
     if not ctx or not query:
         return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
     q_norm = _normalize_for_match(query)
-    q_terms = [t for t in q_norm.split() if len(t) > 2]   # ignore short tokens
     if not q_terms:
         return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
     sentences = _split_sentences(ctx)
-    matched_exact = []
-    matched_any = []
     for s in sentences:
         s_norm = _normalize_for_match(s)
-        # small boost if sentence looks like a bullet
         is_bullet = bool(re.match(r"^[•\-\*]\s*", s))
         overlap = sum(1 for t in q_terms if t in s_norm) + (1 if is_bullet else 0)
         if overlap >= STRICT_OVERLAP:
             matched_exact.append(s)
         elif overlap > 0:
             matched_any.append(s)
     if matched_exact:
         kept = matched_exact[:MAX_SENTENCES_STRICT]
-        return "\n".join(kept).strip(), {
-            'mode': 'exact',
-            'matched_count': len(kept),
-            'all_sentences': len(sentences)
-        }
     if matched_any:
         kept = matched_any[:MAX_SENTENCES_CONCISE]
-        return "\n".join(kept).strip(), {
-            'mode': 'concise',
-            'matched_count': len(kept),
-            'all_sentences': len(sentences)
-        }
-    # No overlap → keep the first few sentences (still concise, preserve order)
     kept = sentences[:MAX_SENTENCES_CONCISE]
-    return "\n".join(kept).strip(), {
-        'mode': 'concise',
-        'matched_count': 0,
-        'all_sentences': len(sentences)
-    }
-# ---------- NEW: intent-specific line extractors (steps/navigation/errors) ----------
 STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", re.IGNORECASE)
 NAV_LINE_REGEX  = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
-# Common imperative verbs across SOPs (add more if you want, optional)
 PROCEDURE_VERBS = [
     "log in", "select", "scan", "verify", "confirm", "print",
     "move", "complete", "click", "open", "navigate", "choose",
@@ -365,56 +290,55 @@ PROCEDURE_VERBS = [
 ]
 VERB_START_REGEX = re.compile(r"^\s*(?:" + "|".join([re.escape(v) for v in PROCEDURE_VERBS]) + r")\b", re.IGNORECASE)
-# Lines that clearly are NOT steps when user intent is 'steps'
-NON_PROCEDURAL_STARTS = [
-    "to ensure", "as per", "purpose", "pre-requisites", "prerequisites", "overview", "introduction"
 ]
-NON_PROC_REGEX = re.compile(r"^\s*(?:" + "|".join([re.escape(v) for v in NON_PROCEDURAL_STARTS]) + r")\b", re.IGNORECASE)
 def _is_procedural_line(ln: str) -> bool:
-    """
-    A line is procedural if:
-      - it starts with a number/bullet, OR
-      - it starts with an imperative verb (Log in, Select, Scan, etc.)
-    and it does not look like Purpose/Pre-Requisites/Overview.
-    Bullets are kept only if they contain an action verb (to avoid prereq bullets).
-    """
     s = (ln or "").strip()
     if not s:
         return False
-    # Exclude clearly non-procedural lines
-    if NON_PROC_REGEX.match(s):
         return False
-    # Numbered/bulleted lines
     if STEP_LINE_REGEX.match(s):
-        # Keep bullet only if an action verb appears somewhere in the line
         if s.lstrip().startswith(("•", "-")):
-            return bool(VERB_START_REGEX.search(s))
         return True
-    # Imperative verb lines (covers Word lists where 1. isn't part of the text)
     if VERB_START_REGEX.match(s):
         return True
-    # Allow navigation lines (even if not numbered)
     if NAV_LINE_REGEX.search(s):
         return True
     return False
-def _extract_steps_only(text: str, max_lines: int = 12) -> str:
-    """
-    Keep only procedural lines (numbered/bulleted or imperative verb starts) in original order.
-    """
     lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
     kept = []
     for ln in lines:
         if _is_procedural_line(ln):
             kept.append(ln)
             if len(kept) >= max_lines:
                 break
-    # If nothing matched (rare), return the original “concise” filtered text
     return "\n".join(kept).strip() if kept else (text or "").strip()
 def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
@@ -431,41 +355,24 @@ def _extract_errors_only(text: str, max_lines: int = 10) -> str:
     lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
     kept = []
     for ln in lines:
-        # Keep error/resolution bullets or imperative fixes (verify, check, etc.)
         if STEP_LINE_REGEX.match(ln) or ln.lower().startswith(("error", "resolution", "fix", "verify", "check")):
             kept.append(ln)
             if len(kept) >= max_lines:
                 break
     return "\n".join(kept).strip() if kept else (text or "").strip()
-# ---------- Health ----------
 @app.get("/")
 async def health_check():
     return {"status": "ok"}
-# ---------- Chat endpoint ----------
 @app.post("/chat")
 async def chat_with_ai(input_data: ChatInput):
-    """
-    Policy:
-    A) If 'resolved/working' detected → auto-create tracking incident, auto-mark Resolved, and ask if further help needed.
-    B) If user intent is 'create incident/ticket' → SHOW INCIDENT FORM (ask Short & Long description). No Yes/No confirmation here.
-    C) Ticket status intent → if number missing, ask for it; else return status from ServiceNow.
-    D) Generic issue/openers → ask clarifying details first (no KB yet).
-       Feedback-only → first ask clarifying details; after user shares details, try KB again.
-       If still no context after clarification → suggest incident ONCE (Yes/No).
-    E) Otherwise, use HYBRID search; rewrite answer from KB only; ask resolved if 'OK', or refine if 'PARTIAL'.
-    """
     try:
         msg_norm = (input_data.user_message or "").lower().strip()
-        # -- Handle Yes/No replies from the UI --
         if msg_norm in ("yes", "y", "sure", "ok", "okay"):
             return {
-                "bot_response": (
-                    "Great! Tell me what you’d like to do next — check another ticket, "
-                    "create an incident, or describe your issue."
-                ),
                 "status": "OK",
                 "followup": "You can say: 'create ticket', 'incident status INC0012345', or describe your problem.",
                 "options": [],
@@ -481,19 +388,16 @@ async def chat_with_ai(input_data: ChatInput):
                 "debug": {"intent": "end_conversation"},
             }
-        # -- (A) Resolution acknowledgement --
         is_llm_resolved = _classify_resolution_llm(input_data.user_message)
         if _has_negation_resolved(msg_norm):
             is_llm_resolved = False
         if (not _has_negation_resolved(msg_norm)) and (_is_resolution_ack_heuristic(msg_norm) or is_llm_resolved):
             try:
                 short_desc, long_desc = _build_tracking_descriptions(input_data.last_issue, input_data.user_message)
-                result = create_incident(short_desc, long_desc)  # ServiceNow helper
                 if isinstance(result, dict) and not result.get("error"):
                     inc_number = result.get("number", "<unknown>")
                     sys_id = result.get("sys_id")
-                    # Auto-mark resolved (state=6) if we have sys_id
                     resolved_note = ""
                     if sys_id:
                         ok = _set_incident_resolved(sys_id)
@@ -536,7 +440,6 @@ async def chat_with_ai(input_data: ChatInput):
                     "debug": {"intent": "resolved_ack", "exception": True},
                 }
-        # -- (B) Incident intent --
         if _is_incident_intent(msg_norm):
             return {
                 "bot_response": (
@@ -555,7 +458,6 @@ async def chat_with_ai(input_data: ChatInput):
                 "debug": {"intent": "create_ticket"},
             }
-        # -- (B.1) Generic issue/open-ended messages → ask details first
         if _is_generic_issue(msg_norm):
             return {
                 "bot_response": (
@@ -576,7 +478,6 @@ async def chat_with_ai(input_data: ChatInput):
                 "debug": {"intent": "generic_issue"},
             }
-        # -- (C) Ticket status intent --
         status_intent = _parse_ticket_status_intent(msg_norm)
         if status_intent:
             if status_intent.get("ask_number"):
@@ -587,7 +488,7 @@ async def chat_with_ai(input_data: ChatInput):
                     "ask_resolved": False,
                     "suggest_incident": False,
                     "followup": "Provide the Incident ID and I’ll fetch the status.",
-                    "show_status_form": True,  # helps front-end show the status input card
                     "top_hits": [],
                     "sources": [],
                     "debug": {"intent": "status_request_missing_id"},
@@ -609,13 +510,9 @@ async def chat_with_ai(input_data: ChatInput):
                 short = result.get("short_description", "")
                 num = result.get("number", number or "unknown")
                 return {
-                    "bot_response": (
-                        f"**Ticket:** {num} \n"
-                        f"**Status:** {state_label} \n"
-                        f"**Issue description:** {short}"
-                    ),
                     "status": "OK",
-                    "show_assist_card": True,  # show Yes/No card so user can continue
                     "context_found": False,
                     "ask_resolved": False,
                     "suggest_incident": False,
@@ -627,56 +524,32 @@ async def chat_with_ai(input_data: ChatInput):
             except Exception as e:
                 raise HTTPException(status_code=500, detail=str(e))
-        # -- (D) Feedback-only messages --
-        if _is_feedback_message(msg_norm):
-            second_try = (input_data.prev_status or "").upper() == "NO_KB_MATCH"
-            return {
-                "bot_response": (
-                    "Understood. To refine the steps, please share:\n"
-                    "• Exact error text/code\n"
-                    "• IDs (Order#, Load ID, Shipment#)\n"
-                    "• Site & environment (prod/test)\n"
-                    "• When it started and how many users are impacted"
-                    if not second_try else
-                    "It still looks unresolved after clarification."
-                ),
-                "status": "NO_KB_MATCH",
-                "context_found": False,
-                "ask_resolved": False,
-                "suggest_incident": bool(second_try),  # on second try → show Yes/No card
-                "followup": ("Please reply with the above details." if not second_try else "Shall I create a ticket now?"),
-                "top_hits": [],
-                "sources": [],
-                "debug": {"feedback_only": True, "second_try": second_try},
-            }
-        # -- (E) HYBRID KB search & rewrite --
         kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
         kb_ctx = extract_kb_context(kb_results, top_chunks=2)
         context_raw = kb_ctx.get("context", "") or ""
-        # Filter to exact/concise and always preserve original order of matched sentences
         filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
         context = filtered_text
         context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
-        best_distance = kb_ctx.get("best_score")    # lower = better
-        best_combined = kb_ctx.get("best_combined") # higher = better
         detected_intent = kb_results.get("user_intent", "neutral")
-        # Intent-shaped extraction (steps/navigation/errors)
         q = (input_data.user_message or "").lower()
         if detected_intent == "steps" or any(k in q for k in ["steps", "procedure", "perform", "do", "process"]):
-            context = _extract_steps_only(context, max_lines=12)
         elif detected_intent == "errors" or any(k in q for k in ["error", "issue", "fail", "not working", "resolution", "fix"]):
             context = _extract_errors_only(context, max_lines=10)
         elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
             context = _extract_navigation_only(context, max_lines=6)
-        # else: leave context as-is (concise filter already applied)
-        # Dynamic gating
         short_query = len((input_data.user_message or "").split()) <= 4
-        gate_combined_no_kb = 0.22 if short_query else 0.28  # below this → NO_KB
-        gate_combined_ok = 0.60 if short_query else 0.55     # above this → OK
         gate_distance_no_kb = 2.0
         if (not context_found or not context.strip()) or (
@@ -701,26 +574,18 @@ async def chat_with_ai(input_data: ChatInput):
                 "debug": {"used_chunks": 0, "second_try": second_try, "best_distance": best_distance, "best_combined": best_combined},
             }
-        # We have KB context → LLM rewrite (KB‑only, no Source lines)
-        threshold_ok = gate_combined_ok
-        mode_note = (
-            "Return ONLY the matched lines from the context in the same order."
-            if filt_info.get("mode") == "exact" else
-            "Return a short, meaningful snippet strictly based on the context."
-        )
         enhanced_prompt = (
             "From the provided context, output only the actionable steps/procedure relevant to the user's question. "
             "Use ONLY the provided context; do NOT add information that is not present. "
-            f"{mode_note} "
-            "Do NOT include any document names, section titles, or 'Source:' lines.\n\n"
             f"### Context\n{context}\n\n"
             f"### Question\n{input_data.user_message}\n\n"
             "### Output\n"
             "- Return numbered/bulleted steps only, in the same order.\n"
             "- If context is insufficient, add: 'This may be partial based on available KB.'\n"
         )
         headers = {"Content-Type": "application/json"}
         payload = {"contents": [{"parts": [{"text": enhanced_prompt}]}]}
         try:
@@ -734,20 +599,16 @@ async def chat_with_ai(input_data: ChatInput):
             result = {}
         try:
-            bot_text = (
-                result["candidates"][0]["content"]["parts"][0]["text"]
-                if isinstance(result, dict) else ""
-            )
         except Exception:
             bot_text = ""
         if not bot_text.strip():
-            # Fallback to the filtered/intent-shaped context (never the full SOP chunk)
-            bot_text = context
         bot_text = _strip_any_source_lines(bot_text).strip()
         status = "OK" if (
-            (best_combined is not None and best_combined >= threshold_ok)
             or (filt_info.get('mode') == 'exact' and filt_info.get('matched_count', 0) > 0)
         ) else "PARTIAL"
@@ -755,17 +616,13 @@ async def chat_with_ai(input_data: ChatInput):
         if ("partial" in lower) or ("may be partial" in lower) or ("closest" in lower) or ("may not fully" in lower):
             status = "PARTIAL"
-        ask_resolved = (status == "OK")
-        suggest_incident = False
-        followup = ("Does this match your scenario? I can refine the steps." if status == "PARTIAL" else None)
         return {
             "bot_response": bot_text,
             "status": status,
             "context_found": True,
-            "ask_resolved": ask_resolved,
-            "suggest_incident": suggest_incident,
-            "followup": followup,
             "top_hits": [],
             "sources": [],
             "debug": {
@@ -776,6 +633,7 @@ async def chat_with_ai(input_data: ChatInput):
                 "filter_mode": filt_info.get("mode"),
                 "matched_count": filt_info.get("matched_count"),
                 "user_intent": detected_intent,
             },
         }
@@ -784,16 +642,7 @@ async def chat_with_ai(input_data: ChatInput):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-# ---------- Incident endpoints ----------
 def _set_incident_resolved(sys_id: str) -> bool:
-    """
-    Robust resolver:
-    A) Try default fields (close_code/close_notes/caller_id) with state=6
-    B) If fails, try state="Resolved"
-    C) If still fails, try custom field names from env (e.g., u_resolution_code/u_resolution_notes)
-    Optional: pre-step to In Progress if SERVICENOW_REQUIRE_IN_PROGRESS_FIRST=true
-    Logs a short diagnostic line on failure so we can see the exact reason.
-    """
     try:
         token = get_valid_token()
         instance_url = os.getenv("SERVICENOW_INSTANCE_URL")
@@ -810,8 +659,8 @@ def _set_incident_resolved(sys_id: str) -> bool:
         close_code_val = os.getenv("SERVICENOW_CLOSE_CODE", "Solution provided")
         close_notes_val = os.getenv("SERVICENOW_RESOLUTION_NOTES", "Issue resolved, user confirmed")
         caller_sysid = os.getenv("SERVICENOW_CALLER_SYSID")
-        resolved_by_sysid = os.getenv("SERVICENOW_RESOLVED_BY_SYSID")  # optional
-        assign_group = os.getenv("SERVICENOW_ASSIGNMENT_GROUP_SYSID")  # optional
         require_progress = os.getenv("SERVICENOW_REQUIRE_IN_PROGRESS_FIRST", "false").lower() in ("1", "true", "yes")
         if require_progress:
@@ -889,8 +738,6 @@ async def raise_incident(input_data: IncidentInput):
             ticket_text = f"Incident created: {inc_number}{resolved_note}"
         else:
             ticket_text = "Incident created."
-        # Do NOT include follow-up question inside bot_response to avoid duplication in UI.
         return {
             "bot_response": f"✅ {ticket_text}",
             "debug": "Incident created via ServiceNow",
@@ -945,10 +792,7 @@ async def incident_status(input_data: TicketStatusInput):
         instance_url = os.getenv("SERVICENOW_INSTANCE_URL")
         if not instance_url:
             raise HTTPException(status_code=500, detail="SERVICENOW_INSTANCE_URL missing")
-        headers = {
-            "Authorization": f"Bearer {token}",
-            "Accept": "application/json",
-        }
         if input_data.sys_id:
             url = f"{instance_url}/api/now/table/incident/{input_data.sys_id}"
             response = requests.get(url, headers=headers, verify=VERIFY_SSL, timeout=25)
@@ -962,19 +806,14 @@ async def incident_status(input_data: TicketStatusInput):
             result = (lst or [{}])[0] if response.status_code == 200 else {}
         else:
             raise HTTPException(status_code=400, detail="Provide IncidentID (number) or sys_id")
         state_code = str(result.get("state", "unknown"))
         state_label = STATE_MAP.get(state_code, state_code)
         short = result.get("short_description", "")
         number = result.get("number", input_data.number or "unknown")
         return {
-            "bot_response": (
-                f"**Ticket:** {number} \n"
-                f"**Status:** {state_label} \n"
-                f"**Issue description:** {short}"
-            ).replace("\n", " \n"),  # hard breaks for ReactMarkdown
             "followup": "Is there anything else I can assist you with?",
-            "show_assist_card": True,  # Yes/No card so user can continue
             "persist": True,
             "debug": "Incident status fetched",
         }

 from dotenv import load_dotenv
 from datetime import datetime
 from services.kb_creation import (
     collection,
     ingest_documents,
+    hybrid_search_knowledge_base,
 )
+from services.login import router as login_router
+from services.generate_ticket import get_valid_token, create_incident
 VERIFY_SSL = os.getenv("SERVICENOW_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
 GEMINI_SSL_VERIFY = os.getenv("GEMINI_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
 load_dotenv()
+os.environ["POSTHOG_DISABLED"] = "true"
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     try:
         folder_path = os.path.join(os.getcwd(), "documents")
         if collection.count() == 0:
             print("🔍 KB empty. Running ingestion...")
+            ingest_documents(folder_path)
         else:
             print(f"✅ KB already populated with {collection.count()} entries. Skipping ingestion.")
     except Exception as e:
         print(f"⚠️ KB ingestion failed: {e}")
     yield
 app = FastAPI(lifespan=lifespan)
 app.include_router(login_router)
+origins = ["https://chatbotnova-chatbot-frontend.hf.space"]
 app.add_middleware(
     CORSMiddleware,
     allow_origins=origins,
     allow_headers=["*"],
 )
 class ChatInput(BaseModel):
     user_message: str
+    prev_status: Optional[str] = None
     last_issue: Optional[str] = None
 class IncidentInput(BaseModel):
 class TicketStatusInput(BaseModel):
     sys_id: Optional[str] = None
+    number: Optional[str] = None
 STATE_MAP = {
     "1": "New",
     "2": "In Progress",
     "8": "Canceled",
 }
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GEMINI_URL = (
     f"https://generativelanguage.googleapis.com/v1beta/models/"
     f"gemini-2.5-flash-lite:generateContent?key={GEMINI_API_KEY}"
 )
 def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2) -> Dict[str, Any]:
     if not kb_results or not isinstance(kb_results, dict):
         return {"context": "", "sources": [], "top_hits": [], "context_found": False, "best_score": None, "best_combined": None}
     documents = kb_results.get("documents") or []
     metadatas = kb_results.get("metadatas") or []
     distances = kb_results.get("distances") or []
     combined = kb_results.get("combined_scores") or []
     items = []
     for i, doc in enumerate(documents):
         text = doc.strip() if isinstance(doc, str) else ""
     context = "\n\n---\n\n".join([s["text"] for s in selected]) if selected else ""
     sources = [s["meta"] for s in selected]
     best_distance = None
     if distances:
         try:
             best_distance = min([d for d in distances if d is not None])
         except Exception:
             best_distance = None
     best_combined = None
     if combined:
         try:
     return {
         "context": context,
         "sources": sources,
+        "top_hits": [],
         "context_found": bool(selected),
         "best_score": best_distance,
         "best_combined": best_combined,
         "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:\n\n"
         "• Module/area (e.g., Picking, Receiving, Trailer Close)\n"
         "• Exact error message text/code (copy-paste)\n"
+        "• IDs involved (Order#, Load ID, Shipment#)\n"
         "• Warehouse/site & environment (prod/test)\n"
         "• When it started and how many users are impacted\n\n"
         "Reply with these details and I’ll search again."
     )
 def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> tuple[str, str]:
     issue = (issue_text or "").strip()
     resolved = (resolved_text or "").strip()
     return any(p in msg_norm for p in feedback_phrases)
 def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
+    status_keywords = ["status", "ticket status", "incident status", "check status", "check ticket status", "check incident status"]
     if not any(k in msg_norm for k in status_keywords):
+        return {}
+    patterns = [r"(?:incident\s*id|incidentid|ticket\s*number|number)\s*[:=]?\s*(inc\d+)", r"(inc\d+)"]
     for pat in patterns:
         m = re.search(pat, msg_norm, flags=re.IGNORECASE)
         if m:
         resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=12, verify=GEMINI_SSL_VERIFY)
         data = resp.json()
         text = (
+            data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
+        ).strip().lower()
         return "true" in text
     except Exception:
         return False
     ]
     return any(p == msg_norm or p in msg_norm for p in generic_phrases) or len(msg_norm.split()) <= 2
+# ---------- Query-normalized, order-preserving filter ----------
+STRICT_OVERLAP = 3
+MAX_SENTENCES_STRICT = 4
+MAX_SENTENCES_CONCISE = 3
 def _normalize_for_match(text: str) -> str:
     t = (text or "").lower()
+    t = re.sub(r"[^\w\s]", " ", t)
+    t = re.sub(r"\s+", " ", t).strip()
     return t
 def _split_sentences(ctx: str) -> list[str]:
     raw_sents = re.split(r"(?<=[.!?])\s+|\n+|•\s*|-\s*", ctx or "")
     return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
 def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
     ctx = (context or "").strip()
     if not ctx or not query:
         return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
     q_norm = _normalize_for_match(query)
+    q_terms = [t for t in q_norm.split() if len(t) > 2]
     if not q_terms:
         return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
     sentences = _split_sentences(ctx)
+    matched_exact, matched_any = [], []
     for s in sentences:
         s_norm = _normalize_for_match(s)
         is_bullet = bool(re.match(r"^[•\-\*]\s*", s))
         overlap = sum(1 for t in q_terms if t in s_norm) + (1 if is_bullet else 0)
         if overlap >= STRICT_OVERLAP:
             matched_exact.append(s)
         elif overlap > 0:
             matched_any.append(s)
     if matched_exact:
         kept = matched_exact[:MAX_SENTENCES_STRICT]
+        return "\n".join(kept).strip(), {'mode': 'exact', 'matched_count': len(kept), 'all_sentences': len(sentences)}
     if matched_any:
         kept = matched_any[:MAX_SENTENCES_CONCISE]
+        return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': len(kept), 'all_sentences': len(sentences)}
     kept = sentences[:MAX_SENTENCES_CONCISE]
+    return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
+# ---------- intent & action specific extractors ----------
 STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", re.IGNORECASE)
 NAV_LINE_REGEX  = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
 PROCEDURE_VERBS = [
     "log in", "select", "scan", "verify", "confirm", "print",
     "move", "complete", "click", "open", "navigate", "choose",
 ]
 VERB_START_REGEX = re.compile(r"^\s*(?:" + "|".join([re.escape(v) for v in PROCEDURE_VERBS]) + r")\b", re.IGNORECASE)
+NON_PROC_PHRASES = [
+    "to ensure", "as per", "purpose", "pre-requisites", "prerequisites", "overview", "introduction",
+    "organized manner", "structured", "help users", "objective"
 ]
+NON_PROC_ANY_REGEX = re.compile("|".join([re.escape(v) for v in NON_PROC_PHRASES]), re.IGNORECASE)
+ACTION_SYNS_FLAT = {
+    "create": ["create", "creation", "add", "new", "generate"],
+    "update": ["update", "modify", "change", "edit"],
+    "delete": ["delete", "remove"],
+    "navigate": ["navigate", "go to", "open"],
+}
+def _action_in_line(ln: str, target_actions: list[str]) -> bool:
+    s = (ln or "").lower()
+    for act in target_actions:
+        for syn in ACTION_SYNS_FLAT.get(act, [act]):
+            if syn in s:
+                return True
+    return False
 def _is_procedural_line(ln: str) -> bool:
     s = (ln or "").strip()
     if not s:
         return False
+    if NON_PROC_ANY_REGEX.search(s):
         return False
     if STEP_LINE_REGEX.match(s):
         if s.lstrip().startswith(("•", "-")):
+            return bool(VERB_START_REGEX.search(s) or NAV_LINE_REGEX.search(s))
         return True
     if VERB_START_REGEX.match(s):
         return True
     if NAV_LINE_REGEX.search(s):
         return True
     return False
+def _extract_steps_only(text: str, max_lines: int = 12, target_actions: list[str] | None = None) -> str:
     lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
     kept = []
     for ln in lines:
         if _is_procedural_line(ln):
+            # If specific action requested (e.g., create), keep only lines containing that action
+            if target_actions and len(target_actions) > 0:
+                if not _action_in_line(ln, target_actions):
+                    continue
             kept.append(ln)
             if len(kept) >= max_lines:
                 break
     return "\n".join(kept).strip() if kept else (text or "").strip()
 def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
     lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
     kept = []
     for ln in lines:
         if STEP_LINE_REGEX.match(ln) or ln.lower().startswith(("error", "resolution", "fix", "verify", "check")):
             kept.append(ln)
             if len(kept) >= max_lines:
                 break
     return "\n".join(kept).strip() if kept else (text or "").strip()
 @app.get("/")
 async def health_check():
     return {"status": "ok"}
 @app.post("/chat")
 async def chat_with_ai(input_data: ChatInput):
     try:
         msg_norm = (input_data.user_message or "").lower().strip()
         if msg_norm in ("yes", "y", "sure", "ok", "okay"):
             return {
+                "bot_response": ("Great! Tell me what you’d like to do next — check another ticket, create an incident, or describe your issue."),
                 "status": "OK",
                 "followup": "You can say: 'create ticket', 'incident status INC0012345', or describe your problem.",
                 "options": [],
                 "debug": {"intent": "end_conversation"},
             }
         is_llm_resolved = _classify_resolution_llm(input_data.user_message)
         if _has_negation_resolved(msg_norm):
             is_llm_resolved = False
         if (not _has_negation_resolved(msg_norm)) and (_is_resolution_ack_heuristic(msg_norm) or is_llm_resolved):
             try:
                 short_desc, long_desc = _build_tracking_descriptions(input_data.last_issue, input_data.user_message)
+                result = create_incident(short_desc, long_desc)
                 if isinstance(result, dict) and not result.get("error"):
                     inc_number = result.get("number", "<unknown>")
                     sys_id = result.get("sys_id")
                     resolved_note = ""
                     if sys_id:
                         ok = _set_incident_resolved(sys_id)
                     "debug": {"intent": "resolved_ack", "exception": True},
                 }
         if _is_incident_intent(msg_norm):
             return {
                 "bot_response": (
                 "debug": {"intent": "create_ticket"},
             }
         if _is_generic_issue(msg_norm):
             return {
                 "bot_response": (
                 "debug": {"intent": "generic_issue"},
             }
         status_intent = _parse_ticket_status_intent(msg_norm)
         if status_intent:
             if status_intent.get("ask_number"):
                     "ask_resolved": False,
                     "suggest_incident": False,
                     "followup": "Provide the Incident ID and I’ll fetch the status.",
+                    "show_status_form": True,
                     "top_hits": [],
                     "sources": [],
                     "debug": {"intent": "status_request_missing_id"},
                 short = result.get("short_description", "")
                 num = result.get("number", number or "unknown")
                 return {
+                    "bot_response": (f"**Ticket:** {num} \n" f"**Status:** {state_label} \n" f"**Issue description:** {short}"),
                     "status": "OK",
+                    "show_assist_card": True,
                     "context_found": False,
                     "ask_resolved": False,
                     "suggest_incident": False,
             except Exception as e:
                 raise HTTPException(status_code=500, detail=str(e))
+        # ---- Hybrid KB search ----
         kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
         kb_ctx = extract_kb_context(kb_results, top_chunks=2)
         context_raw = kb_ctx.get("context", "") or ""
         filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
         context = filtered_text
         context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
+        best_distance = kb_ctx.get("best_score")
+        best_combined = kb_ctx.get("best_combined")
         detected_intent = kb_results.get("user_intent", "neutral")
+        actions = kb_results.get("actions", [])
+        # Shape context by intent + action
         q = (input_data.user_message or "").lower()
         if detected_intent == "steps" or any(k in q for k in ["steps", "procedure", "perform", "do", "process"]):
+            context = _extract_steps_only(context, max_lines=12, target_actions=actions)
         elif detected_intent == "errors" or any(k in q for k in ["error", "issue", "fail", "not working", "resolution", "fix"]):
             context = _extract_errors_only(context, max_lines=10)
         elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
             context = _extract_navigation_only(context, max_lines=6)
+        # Gating
         short_query = len((input_data.user_message or "").split()) <= 4
+        gate_combined_no_kb = 0.22 if short_query else 0.28
+        gate_combined_ok = 0.60 if short_query else 0.55
         gate_distance_no_kb = 2.0
         if (not context_found or not context.strip()) or (
                 "debug": {"used_chunks": 0, "second_try": second_try, "best_distance": best_distance, "best_combined": best_combined},
             }
+        # LLM rewrite (optional, may rate-limit)
         enhanced_prompt = (
             "From the provided context, output only the actionable steps/procedure relevant to the user's question. "
             "Use ONLY the provided context; do NOT add information that is not present. "
+            ("Return ONLY lines containing the requested action verbs." if actions else "")
+            + " Do NOT include document names, section titles, or 'Source:' lines.\n\n"
             f"### Context\n{context}\n\n"
             f"### Question\n{input_data.user_message}\n\n"
             "### Output\n"
             "- Return numbered/bulleted steps only, in the same order.\n"
             "- If context is insufficient, add: 'This may be partial based on available KB.'\n"
         )
         headers = {"Content-Type": "application/json"}
         payload = {"contents": [{"parts": [{"text": enhanced_prompt}]}]}
         try:
             result = {}
         try:
+            bot_text = (result["candidates"][0]["content"]["parts"][0]["text"] if isinstance(result, dict) else "")
         except Exception:
             bot_text = ""
         if not bot_text.strip():
+            bot_text = context  # strict steps-only fallback
         bot_text = _strip_any_source_lines(bot_text).strip()
         status = "OK" if (
+            (best_combined is not None and best_combined >= gate_combined_ok)
             or (filt_info.get('mode') == 'exact' and filt_info.get('matched_count', 0) > 0)
         ) else "PARTIAL"
         if ("partial" in lower) or ("may be partial" in lower) or ("closest" in lower) or ("may not fully" in lower):
             status = "PARTIAL"
         return {
             "bot_response": bot_text,
             "status": status,
             "context_found": True,
+            "ask_resolved": (status == "OK"),
+            "suggest_incident": False,
+            "followup": ("Does this match your scenario? I can refine the steps." if status == "PARTIAL" else None),
             "top_hits": [],
             "sources": [],
             "debug": {
                 "filter_mode": filt_info.get("mode"),
                 "matched_count": filt_info.get("matched_count"),
                 "user_intent": detected_intent,
+                "actions": actions,
             },
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 def _set_incident_resolved(sys_id: str) -> bool:
     try:
         token = get_valid_token()
         instance_url = os.getenv("SERVICENOW_INSTANCE_URL")
         close_code_val = os.getenv("SERVICENOW_CLOSE_CODE", "Solution provided")
         close_notes_val = os.getenv("SERVICENOW_RESOLUTION_NOTES", "Issue resolved, user confirmed")
         caller_sysid = os.getenv("SERVICENOW_CALLER_SYSID")
+        resolved_by_sysid = os.getenv("SERVICENOW_RESOLVED_BY_SYSID")
+        assign_group = os.getenv("SERVICENOW_ASSIGNMENT_GROUP_SYSID")
         require_progress = os.getenv("SERVICENOW_REQUIRE_IN_PROGRESS_FIRST", "false").lower() in ("1", "true", "yes")
         if require_progress:
             ticket_text = f"Incident created: {inc_number}{resolved_note}"
         else:
             ticket_text = "Incident created."
         return {
             "bot_response": f"✅ {ticket_text}",
             "debug": "Incident created via ServiceNow",
         instance_url = os.getenv("SERVICENOW_INSTANCE_URL")
         if not instance_url:
             raise HTTPException(status_code=500, detail="SERVICENOW_INSTANCE_URL missing")
+        headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
         if input_data.sys_id:
             url = f"{instance_url}/api/now/table/incident/{input_data.sys_id}"
             response = requests.get(url, headers=headers, verify=VERIFY_SSL, timeout=25)
             result = (lst or [{}])[0] if response.status_code == 200 else {}
         else:
             raise HTTPException(status_code=400, detail="Provide IncidentID (number) or sys_id")
         state_code = str(result.get("state", "unknown"))
         state_label = STATE_MAP.get(state_code, state_code)
         short = result.get("short_description", "")
         number = result.get("number", input_data.number or "unknown")
         return {
+            "bot_response": (f"**Ticket:** {number} \n" f"**Status:** {state_label} \n" f"**Issue description:** {short}").replace("\n", " \n"),
             "followup": "Is there anything else I can assist you with?",
+            "show_assist_card": True,
             "persist": True,
             "debug": "Incident status fetched",
         }