srilakshu012456 commited on
Commit
960dcf6
·
verified ·
1 Parent(s): 76caf03

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +211 -299
main.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import json
3
  import re
@@ -10,21 +11,20 @@ from fastapi.middleware.cors import CORSMiddleware
10
  from pydantic import BaseModel
11
  from dotenv import load_dotenv
12
  from datetime import datetime
13
-
14
  from services.kb_creation import (
15
  collection,
16
  ingest_documents,
17
  hybrid_search_knowledge_base,
18
- get_section_text, # NEW
19
- get_best_steps_section_text, # NEW
20
  )
21
-
22
  from services.login import router as login_router
23
  from services.generate_ticket import get_valid_token, create_incident
24
 
25
  VERIFY_SSL = os.getenv("SERVICENOW_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
26
  GEMINI_SSL_VERIFY = os.getenv("GEMINI_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
27
 
 
28
  def safe_str(e: Any) -> str:
29
  try:
30
  return builtins.str(e)
@@ -39,12 +39,12 @@ async def lifespan(app: FastAPI):
39
  try:
40
  folder_path = os.path.join(os.getcwd(), "documents")
41
  if collection.count() == 0:
42
- print("🔍 KB empty. Running ingestion...")
43
  ingest_documents(folder_path)
44
  else:
45
- print(f"KB already populated with {collection.count()} entries. Skipping ingestion.")
46
  except Exception as e:
47
- print(f"⚠️ KB ingestion failed: {safe_str(e)}")
48
  yield
49
 
50
  app = FastAPI(lifespan=lifespan)
@@ -91,76 +91,58 @@ GEMINI_URL = (
91
  f"gemini-2.5-flash-lite:generateContent?key={GEMINI_API_KEY}"
92
  )
93
 
94
- def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2) -> Dict[str, Any]:
95
- if not kb_results or not isinstance(kb_results, dict):
96
- return {"context": "", "sources": [], "top_hits": [], "context_found": False, "best_score": None, "best_combined": None}
97
-
98
- documents = kb_results.get("documents") or []
99
- metadatas = kb_results.get("metadatas") or []
100
- distances = kb_results.get("distances") or []
101
- combined = kb_results.get("combined_scores") or []
102
-
103
- items = []
104
- for i, doc in enumerate(documents):
105
- text = doc.strip() if isinstance(doc, str) else ""
106
- if not text:
107
- continue
108
- meta = metadatas[i] if i < len(metadatas) and isinstance(metadatas[i], dict) else {}
109
- score = distances[i] if i < len(distances) else None
110
- comb = combined[i] if i < len(combined) else None
111
- m = dict(meta)
112
- if score is not None:
113
- m["distance"] = score
114
- if comb is not None:
115
- m["combined"] = comb
116
- items.append({"text": text, "meta": m})
117
-
118
- selected = items[:max(1, top_chunks)]
119
- context = "\n\n---\n\n".join([s["text"] for s in selected]) if selected else ""
120
- sources = [s["meta"] for s in selected]
121
-
122
- best_distance = None
123
- if distances:
124
- try:
125
- best_distance = min([d for d in distances if d is not None])
126
- except Exception:
127
- best_distance = None
128
- best_combined = None
129
- if combined:
130
- try:
131
- best_combined = max([c for c in combined if c is not None])
132
- except Exception:
133
- best_combined = None
134
-
135
- return {
136
- "context": context,
137
- "sources": sources,
138
- "top_hits": [],
139
- "context_found": bool(selected),
140
- "best_score": best_distance,
141
- "best_combined": best_combined,
142
- }
143
-
144
- def _strip_any_source_lines(text: str) -> str:
145
- lines = text.splitlines()
146
- kept = []
147
- for ln in lines:
148
- if re.match(r"^\s*source\s*:", ln, flags=re.IGNORECASE):
149
  continue
150
- kept.append(ln)
151
- return "\n".join(kept).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
 
153
  def _build_clarifying_message() -> str:
154
  return (
155
- "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:\n\n"
156
- "• Module/area (e.g., Picking, Receiving, Trailer Close)\n"
157
- "• Exact error message text/code (copy-paste)\n"
158
- "• IDs involved (Order#, Load ID, Shipment#)\n"
159
- "• Warehouse/site & environment (prod/test)\n"
160
- "• When it started and how many users are impacted\n\n"
161
- "Reply with these details and I’ll search again."
 
 
 
 
 
 
 
 
162
  )
163
 
 
164
  def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[str, str]:
165
  issue = (issue_text or "").strip()
166
  resolved = (resolved_text or "").strip()
@@ -181,14 +163,6 @@ def _is_incident_intent(msg_norm: str) -> bool:
181
  ]
182
  return any(p in msg_norm for p in intent_phrases)
183
 
184
- def _is_feedback_message(msg_norm: str) -> bool:
185
- feedback_phrases = [
186
- "issue not resolved", "not resolved", "still not working",
187
- "same issue", "no change", "didn't work", "doesn't work",
188
- "not fixed", "still failing", "failed again",
189
- ]
190
- return any(p in msg_norm for p in feedback_phrases)
191
-
192
  def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
193
  status_keywords = ["status", "ticket status", "incident status", "check status", "check ticket status", "check incident status"]
194
  if not any(k in msg_norm for k in status_keywords):
@@ -221,8 +195,11 @@ def _classify_resolution_llm(user_message: str) -> bool:
221
  if not GEMINI_API_KEY:
222
  return False
223
  prompt = (
224
- "Classify if the following user message indicates that the issue is resolved or working now.\n"
225
- "Return only 'true' or 'false'.\n\n"
 
 
 
226
  f"Message: {user_message}"
227
  )
228
  headers = {"Content-Type": "application/json"}
@@ -240,16 +217,7 @@ def _classify_resolution_llm(user_message: str) -> bool:
240
  except Exception:
241
  return False
242
 
243
- def _is_generic_issue(msg_norm: str) -> bool:
244
- generic_phrases = [
245
- "issue", "have an issue", "having an issue", "got an issue",
246
- "problem", "have a problem", "help", "need help", "support",
247
- "need support", "please help", "need assistance", "assist me",
248
- "facing issue", "facing a problem", "got a problem"
249
- ]
250
- return any(p == msg_norm or p in msg_norm for p in generic_phrases) or len(msg_norm.split()) <= 2
251
-
252
- # ---------- Query-normalized, order-preserving filter ----------
253
  STRICT_OVERLAP = 3
254
  MAX_SENTENCES_STRICT = 4
255
  MAX_SENTENCES_CONCISE = 3
@@ -261,7 +229,8 @@ def _normalize_for_match(text: str) -> str:
261
  return t
262
 
263
  def _split_sentences(ctx: str) -> List[str]:
264
- raw_sents = re.split(r"(?<=[.!?])\s+|\n+|•\s*|-\s*", ctx or "")
 
265
  return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
266
 
267
  def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str, Any]]:
@@ -284,109 +253,49 @@ def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str,
284
  matched_any.append(s)
285
  if matched_exact:
286
  kept = matched_exact[:MAX_SENTENCES_STRICT]
287
- return "\n".join(kept).strip(), {'mode': 'exact', 'matched_count': len(kept), 'all_sentences': len(sentences)}
 
288
  if matched_any:
289
  kept = matched_any[:MAX_SENTENCES_CONCISE]
290
- return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': len(kept), 'all_sentences': len(sentences)}
 
291
  kept = sentences[:MAX_SENTENCES_CONCISE]
292
- return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
293
-
294
- # ---------- intent & action specific extractors ----------
295
- STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", re.IGNORECASE)
296
- NAV_LINE_REGEX = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
297
-
298
- PROCEDURE_VERBS = [
299
- "log in", "select", "scan", "verify", "confirm", "print",
300
- "move", "complete", "click", "open", "navigate", "choose",
301
- "enter", "update", "save", "delete", "create", "attach", "assign"
302
- ]
303
- VERB_START_REGEX = re.compile(r"^\s*(?:" + "|".join([re.escape(v) for v in PROCEDURE_VERBS]) + r")\b", re.IGNORECASE)
304
-
305
- NON_PROC_PHRASES = [
306
- "to ensure", "as per", "purpose", "pre-requisites", "prerequisites", "overview", "introduction",
307
- "organized manner", "structured", "help users", "objective"
308
- ]
309
- NON_PROC_ANY_REGEX = re.compile("|".join([re.escape(v) for v in NON_PROC_PHRASES]), re.IGNORECASE)
310
-
311
- ACTION_SYNS_FLAT = {
312
- "create": ["create", "creation", "add", "new", "generate"],
313
- "update": ["update", "modify", "change", "edit"],
314
- "delete": ["delete", "remove"],
315
- "navigate": ["navigate", "go to", "open"],
316
- }
317
 
318
- def _action_in_line(ln: str, target_actions: List[str]) -> bool:
319
- s = (ln or "").lower()
320
- for act in target_actions:
321
- for syn in ACTION_SYNS_FLAT.get(act, [act]):
322
- if syn in s:
323
- return True
324
- return False
325
-
326
- def _is_procedural_line(ln: str) -> bool:
327
- s = (ln or "").strip()
328
- if not s:
329
- return False
330
- if NON_PROC_ANY_REGEX.search(s):
331
- return False
332
- if STEP_LINE_REGEX.match(s):
333
- if s.lstrip().startswith(("•", "-")):
334
- return bool(VERB_START_REGEX.search(s) or NAV_LINE_REGEX.search(s))
335
- return True
336
- if VERB_START_REGEX.match(s):
337
- return True
338
- if NAV_LINE_REGEX.search(s):
339
- return True
340
- return False
341
-
342
- def _extract_steps_only(text: str, max_lines: Optional[int] = 12, target_actions: Optional[List[str]] = None) -> str:
343
- lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
344
- kept = []
345
- for ln in lines:
346
- if _is_procedural_line(ln):
347
- if target_actions:
348
- if not _action_in_line(ln, target_actions):
349
- continue
350
- kept.append(ln)
351
- if max_lines is not None and len(kept) >= max_lines:
352
- break
353
- return "\n".join(kept).strip() if kept else (text or "").strip()
354
 
355
  def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
356
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
357
- kept = []
358
  for ln in lines:
359
- if NAV_LINE_REGEX.search(ln) or ln.lower().startswith("log in"):
360
  kept.append(ln)
361
- if len(kept) >= max_lines:
362
- break
363
- return "\n".join(kept).strip() if kept else (text or "").strip()
 
 
 
 
 
 
 
 
364
 
365
- def _extract_errors_only(text: str, max_lines: int = 10) -> str:
366
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
367
- kept = []
368
  for ln in lines:
369
- if STEP_LINE_REGEX.match(ln) or ln.lower().startswith(("error", "resolution", "fix", "verify", "check")):
 
370
  kept.append(ln)
371
- if len(kept) >= max_lines:
372
- break
373
- return "\n".join(kept).strip() if kept else (text or "").strip()
 
374
 
375
- def _format_steps_markdown(lines: List[str]) -> str:
376
- """
377
- Convert a list of step lines into a clean Markdown numbered list.
378
- Keeps original order, trims whitespace, skips empty lines.
379
- """
380
- items = []
381
- for i, ln in enumerate(lines, start=1):
382
- s = (ln or "").strip()
383
- if not s:
384
- continue
385
- # If the line already has leading "1. " or "• ", strip it so numbering is consistent
386
- s = re.sub(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", "", s).strip()
387
- items.append(f"{i}. {s}")
388
- return "\n".join(items).strip()
389
-
390
  @app.get("/")
391
  async def health_check():
392
  return {"status": "ok"}
@@ -407,7 +316,7 @@ async def chat_with_ai(input_data: ChatInput):
407
  }
408
  if msg_norm in ("no", "no thanks", "nope"):
409
  return {
410
- "bot_response": "Glad I could help! 👋 If you need anything else later, just let me know.",
411
  "status": "OK",
412
  "end_chat": True,
413
  "followup": None,
@@ -415,7 +324,7 @@ async def chat_with_ai(input_data: ChatInput):
415
  "debug": {"intent": "end_conversation"},
416
  }
417
 
418
- # --- Resolution ack ---
419
  is_llm_resolved = _classify_resolution_llm(input_data.user_message)
420
  if _has_negation_resolved(msg_norm):
421
  is_llm_resolved = False
@@ -472,8 +381,12 @@ async def chat_with_ai(input_data: ChatInput):
472
  if _is_incident_intent(msg_norm):
473
  return {
474
  "bot_response": (
475
- "Okay, let’s create a ServiceNow incident.\n\n"
476
- "Please provide:\n• Short Description (one line)\n"
 
 
 
 
477
  "• Detailed Description (steps, error text, IDs, site, environment)"
478
  ),
479
  "status": (input_data.prev_status or "PARTIAL"),
@@ -488,21 +401,14 @@ async def chat_with_ai(input_data: ChatInput):
488
  }
489
 
490
  # --- Generic opener ---
491
- if _is_generic_issue(msg_norm):
492
  return {
493
- "bot_response": (
494
- "Sure, I can help. Please describe your issue:\n"
495
- "• Module/area (e.g., Picking, Receiving, Trailer Close)\n"
496
- "• Exact error message text/code (copy-paste)\n"
497
- "• IDs involved (Order#, Load ID, Shipment#)\n"
498
- "• Warehouse/site & environment (prod/test)\n"
499
- "• When it started and how many users are impacted"
500
- ),
501
  "status": "NO_KB_MATCH",
502
  "context_found": False,
503
  "ask_resolved": False,
504
- "suggest_incident": False,
505
- "followup": "Please reply with the above details.",
506
  "top_hits": [],
507
  "sources": [],
508
  "debug": {"intent": "generic_issue"},
@@ -542,8 +448,10 @@ async def chat_with_ai(input_data: ChatInput):
542
  num = result.get("number", number or "unknown")
543
  return {
544
  "bot_response": (
545
- f"**Ticket:** {num} \n"
546
- f"**Status:** {state_label} \n"
 
 
547
  f"**Issue description:** {short}"
548
  ),
549
  "status": "OK",
@@ -561,48 +469,67 @@ async def chat_with_ai(input_data: ChatInput):
561
 
562
  # --- Hybrid KB search ---
563
  kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
564
- kb_ctx = extract_kb_context(kb_results, top_chunks=2)
565
- context_raw = kb_ctx.get("context", "") or ""
566
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
568
  context = filtered_text
569
- context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
570
- best_distance = kb_ctx.get("best_score")
571
- best_combined = kb_ctx.get("best_combined")
572
  detected_intent = kb_results.get("user_intent", "neutral")
573
  actions = kb_results.get("actions", [])
574
  best_doc = kb_results.get("best_doc")
575
- top_meta = (kb_results.get("metadatas") or [{}])[0] if (kb_results.get("metadatas") or []) else {}
576
 
577
- # --- FULL SECTION when strongly found & steps intent ---
578
  if detected_intent == "steps" and best_doc:
579
- # prefer full 'Process Steps' section from the best SOP
580
  full_steps = get_best_steps_section_text(best_doc)
581
  if not full_steps:
582
- # fallback: full text of the top section
583
  sec = (top_meta or {}).get("section")
584
  if sec:
585
  full_steps = get_section_text(best_doc, sec)
586
  if full_steps:
587
- # If confidence is high, return ALL procedural lines (no truncation)
588
- high_conf = (best_combined is not None and best_combined >= 0.75)
589
- context = _extract_steps_only(full_steps, max_lines=None if high_conf else 20, target_actions=actions)
590
 
591
- # Intent-shaping (secondary; if not already handled above)
592
  q = (input_data.user_message or "").lower()
593
- if detected_intent == "steps" or any(k in q for k in ["steps", "procedure", "perform", "do", "process"]):
594
- context = _extract_steps_only(context, max_lines=None if (best_combined and best_combined >= 0.75) else 12, target_actions=actions)
595
- elif detected_intent == "errors" or any(k in q for k in ["error", "issue", "fail", "not working", "resolution", "fix"]):
596
- context = _extract_errors_only(context, max_lines=10)
 
597
  elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
598
  context = _extract_navigation_only(context, max_lines=6)
 
 
599
 
600
  # Gating
601
  short_query = len((input_data.user_message or "").split()) <= 4
602
  gate_combined_no_kb = 0.22 if short_query else 0.28
603
  gate_combined_ok = 0.60 if short_query else 0.55
604
  gate_distance_no_kb = 2.0
605
-
606
  if (not context_found or not context.strip()) or (
607
  (best_combined is None or best_combined < gate_combined_no_kb)
608
  and (best_distance is None or best_distance >= gate_distance_no_kb)
@@ -618,29 +545,38 @@ async def chat_with_ai(input_data: ChatInput):
618
  "status": "NO_KB_MATCH",
619
  "context_found": False,
620
  "ask_resolved": False,
621
- "suggest_incident": bool(second_try),
622
- "followup": ("Please reply with the above details." if not second_try else "Shall I create a ticket now?"),
623
  "top_hits": [],
624
  "sources": [],
625
  "debug": {"used_chunks": 0, "second_try": second_try, "best_distance": best_distance, "best_combined": best_combined},
626
  }
627
 
628
- # LLM rewrite (kept, but we still fallback cleanly)
629
  enhanced_prompt = (
630
- "From the provided context, output only the actionable steps/procedure relevant to the user's question. "
631
- "Use ONLY the provided context; do NOT add information that is not present. "
632
- + ("Return ONLY lines containing the requested action verbs. " if actions else "")
633
- + "Do NOT include document names, section titles, or 'Source:' lines.\n\n"
634
- f"### Context\n{context}\n\n"
635
- f"### Question\n{input_data.user_message}\n\n"
636
- "### Output\n"
637
- "- Return numbered/bulleted steps in the same order.\n"
638
- "- If context is insufficient, add: 'This may be partial based on available KB.'\n"
 
 
 
 
 
 
 
 
 
639
  )
640
  headers = {"Content-Type": "application/json"}
641
  payload = {"contents": [{"parts": [{"text": enhanced_prompt}]}]}
642
  try:
643
- resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=VERIFY_SSL)
644
  try:
645
  result = resp.json()
646
  except Exception:
@@ -648,42 +584,17 @@ async def chat_with_ai(input_data: ChatInput):
648
  except Exception:
649
  resp = type("RespStub", (), {"status_code": 0})()
650
  result = {}
651
-
652
  try:
653
- bot_text = (result["candidates"][0]["content"]["parts"][0]["text"] if isinstance(result, dict) else "")
654
  except Exception:
655
  bot_text = ""
656
-
657
  if not bot_text.strip():
658
  bot_text = context
659
- bot_text = _strip_any_source_lines(bot_text).strip()
660
-
661
- # If the intent is steps, render lines as a numbered Markdown list
662
- if kb_results.get("user_intent", "neutral") == "steps":
663
- raw_lines = [ln.strip() for ln in bot_text.splitlines() if ln.strip()]
664
-
665
- # If everything is on a single line, split defensively on ". "
666
- if len(raw_lines) == 1:
667
- parts = [p.strip() for p in re.split(r"\.\s+(?=[A-Z0-9])", raw_lines[0]) if p.strip()]
668
- raw_lines = parts if len(parts) > 1 else raw_lines
669
-
670
- # 🔴 NEW: merge number-only lines with the next line
671
- merged: list[str] = []
672
- i = 0
673
- while i < len(raw_lines):
674
- curr = raw_lines[i]
675
- # A number-only line (e.g., "1", "2", "3")
676
- if re.fullmatch(r"\d+", curr) and (i + 1) < len(raw_lines):
677
- nxt = raw_lines[i + 1].strip()
678
- # Combine into one line: "1. <next line text>"
679
- merged.append(f"{curr}. {nxt}")
680
- i += 2 # skip the next line; already merged
681
- else:
682
- merged.append(curr)
683
- i += 1
684
-
685
- # Finally: normalize and render as Markdown numbered list
686
- bot_text = _format_steps_markdown(merged)
687
 
688
  status = "OK" if (
689
  (best_combined is not None and best_combined >= gate_combined_ok)
@@ -699,12 +610,16 @@ async def chat_with_ai(input_data: ChatInput):
699
  "status": status,
700
  "context_found": True,
701
  "ask_resolved": (status == "OK"),
702
- "suggest_incident": False,
703
- "followup": ("Does this match your scenario? I can refine the steps." if status == "PARTIAL" else None),
704
  "top_hits": [],
705
  "sources": [],
706
  "debug": {
707
- "used_chunks": len(context.split("\n\n---\n\n")) if context else 0,
 
 
 
 
708
  "best_distance": best_distance,
709
  "best_combined": best_combined,
710
  "http_status": getattr(resp, "status_code", 0),
@@ -721,6 +636,7 @@ async def chat_with_ai(input_data: ChatInput):
721
  except Exception as e:
722
  raise HTTPException(status_code=500, detail=safe_str(e))
723
 
 
724
  def _set_incident_resolved(sys_id: str) -> bool:
725
  try:
726
  token = get_valid_token()
@@ -734,14 +650,12 @@ def _set_incident_resolved(sys_id: str) -> bool:
734
  "Content-Type": "application/json",
735
  }
736
  url = f"{instance_url}/api/now/table/incident/{sys_id}"
737
-
738
  close_code_val = os.getenv("SERVICENOW_CLOSE_CODE", "Solution provided")
739
  close_notes_val = os.getenv("SERVICENOW_RESOLUTION_NOTES", "Issue resolved, user confirmed")
740
  caller_sysid = os.getenv("SERVICENOW_CALLER_SYSID")
741
  resolved_by_sysid = os.getenv("SERVICENOW_RESOLVED_BY_SYSID")
742
  assign_group = os.getenv("SERVICENOW_ASSIGNMENT_GROUP_SYSID")
743
  require_progress = os.getenv("SERVICENOW_REQUIRE_IN_PROGRESS_FIRST", "false").lower() in ("1", "true", "yes")
744
-
745
  if require_progress:
746
  try:
747
  resp1 = requests.patch(url, headers=headers, json={"state": "2"}, verify=VERIFY_SSL, timeout=25)
@@ -814,16 +728,16 @@ async def raise_incident(input_data: IncidentInput):
814
  if bool(input_data.mark_resolved) and sys_id not in ("<unknown>", None):
815
  ok = _set_incident_resolved(sys_id)
816
  resolved_note = " (marked Resolved)" if ok else " (could not mark Resolved; please update manually)"
817
- ticket_text = f"Incident created: {inc_number}{resolved_note}"
 
 
 
 
 
 
 
818
  else:
819
- ticket_text = "Incident created."
820
- return {
821
- "bot_response": f"✅ {ticket_text}",
822
- "debug": "Incident created via ServiceNow",
823
- "persist": True,
824
- "show_assist_card": True,
825
- "followup": "Is there anything else I can assist you with?",
826
- }
827
  except Exception as e:
828
  raise HTTPException(status_code=500, detail=safe_str(e))
829
 
@@ -831,17 +745,23 @@ async def raise_incident(input_data: IncidentInput):
831
  async def generate_ticket_desc_ep(input_data: TicketDescInput):
832
  try:
833
  prompt = (
834
- f"You are helping generate ServiceNow ticket descriptions based on the issue: {input_data.issue}.\n"
835
- "Please return the output strictly in JSON format with the following keys:\n"
836
- "{\n"
837
- ' "ShortDescription": "A concise summary of the issue (max 100 characters)",\n'
838
- ' "DetailedDescription": "A detailed explanation of the issue"\n'
839
- "}\n"
 
 
 
 
 
 
840
  "Do not include any extra text, comments, or explanations outside the JSON."
841
  )
842
  headers = {"Content-Type": "application/json"}
843
  payload = {"contents": [{"parts": [{"text": prompt}]}]}
844
- resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=GEMINI_GSL_VERIFY if 'GEMINI_GSL_VERIFY' in globals() else GEMINI_SSL_VERIFY)
845
  try:
846
  data = resp.json()
847
  except Exception:
@@ -852,7 +772,8 @@ async def generate_ticket_desc_ep(input_data: TicketDescInput):
852
  return {"ShortDescription": "", "DetailedDescription": "", "error": "Gemini parsing failed"}
853
  if text.startswith("```"):
854
  lines = [ln for ln in text.splitlines() if not ln.strip().startswith("```")]
855
- text = "\n".join(lines).strip()
 
856
  try:
857
  ticket_json = json.loads(text)
858
  return {
@@ -891,10 +812,14 @@ async def incident_status(input_data: TicketStatusInput):
891
  number = result.get("number", input_data.number or "unknown")
892
  return {
893
  "bot_response": (
894
- f"**Ticket:** {number} \n"
895
- f"**Status:** {state_label} \n"
 
 
896
  f"**Issue description:** {short}"
897
- ).replace("\n", " \n"),
 
 
898
  "followup": "Is there anything else I can assist you with?",
899
  "show_assist_card": True,
900
  "persist": True,
@@ -902,16 +827,3 @@ async def incident_status(input_data: TicketStatusInput):
902
  }
903
  except Exception as e:
904
  raise HTTPException(status_code=500, detail=safe_str(e))
905
-
906
- # ---- Admin endpoints (optional) ----
907
- @app.get("/kb/info")
908
- async def kb_info():
909
- from services.kb_creation import get_kb_runtime_info
910
- return get_kb_runtime_info()
911
-
912
- @app.post("/kb/reset")
913
- async def kb_reset():
914
- from services.kb_creation import reset_kb
915
- folder_path = os.path.join(os.getcwd(), "documents")
916
- return reset_kb(folder_path)
917
-
 
1
+
2
  import os
3
  import json
4
  import re
 
11
  from pydantic import BaseModel
12
  from dotenv import load_dotenv
13
  from datetime import datetime
 
14
  from services.kb_creation import (
15
  collection,
16
  ingest_documents,
17
  hybrid_search_knowledge_base,
18
+ get_section_text,
19
+ get_best_steps_section_text,
20
  )
 
21
  from services.login import router as login_router
22
  from services.generate_ticket import get_valid_token, create_incident
23
 
24
  VERIFY_SSL = os.getenv("SERVICENOW_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
25
  GEMINI_SSL_VERIFY = os.getenv("GEMINI_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
26
 
27
+
28
  def safe_str(e: Any) -> str:
29
  try:
30
  return builtins.str(e)
 
39
  try:
40
  folder_path = os.path.join(os.getcwd(), "documents")
41
  if collection.count() == 0:
42
+ print("[KB] empty. Running ingestion...")
43
  ingest_documents(folder_path)
44
  else:
45
+ print(f"[KB] already populated with {collection.count()} entries. Skipping ingestion.")
46
  except Exception as e:
47
+ print(f"[KB] ingestion failed: {safe_str(e)}")
48
  yield
49
 
50
  app = FastAPI(lifespan=lifespan)
 
91
  f"gemini-2.5-flash-lite:generateContent?key={GEMINI_API_KEY}"
92
  )
93
 
94
+ # ---------------- Helper: consistently format numbered steps ----------------
95
+ def _format_steps_markdown(lines: List[str]) -> str:
96
+ items: List[str] = []
97
+ for i, ln in enumerate(lines, start=1):
98
+ s = (ln or "").strip()
99
+ if not s:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  continue
101
+ s = re.sub(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", "", s).strip()
102
+ items.append(f"{i}. {s}")
103
+ return "
104
+ ".join(items).strip()
105
+
106
+ def _as_numbered_steps(text: str) -> str:
107
+ raw_lines: List[str] = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
108
+ if len(raw_lines) == 1:
109
+ parts = [p.strip() for p in re.split(r"\.\s+(?=[A-Z0-9])", raw_lines[0]) if p.strip()]
110
+ if len(parts) > 1:
111
+ raw_lines = parts
112
+ merged: List[str] = []
113
+ i = 0
114
+ while i < len(raw_lines):
115
+ curr = raw_lines[i]
116
+ if re.fullmatch(r"\d+", curr) and (i + 1) < len(raw_lines):
117
+ nxt = raw_lines[i + 1].strip()
118
+ merged.append(f"{curr}. {nxt}")
119
+ i += 2
120
+ else:
121
+ merged.append(curr)
122
+ i += 1
123
+ return _format_steps_markdown(merged)
124
 
125
+ # ---------------- Clarifying message (with ticket option) ----------------
126
  def _build_clarifying_message() -> str:
127
  return (
128
+ "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:
129
+
130
+ "
131
+ "• Module/area (e.g., Picking, Receiving, Trailer Close)
132
+ "
133
+ "• Exact error message text/code (copy-paste)
134
+ "
135
+ "• IDs involved (Order#, Load ID, Shipment#)
136
+ "
137
+ "• Warehouse/site & environment (prod/test)
138
+ "
139
+ "• When it started and how many users are impacted
140
+
141
+ "
142
+ "You can also say ‘create ticket’ and I’ll raise a ServiceNow incident now."
143
  )
144
 
145
+ # ---------------- Resolution/Incident helpers ----------------
146
  def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[str, str]:
147
  issue = (issue_text or "").strip()
148
  resolved = (resolved_text or "").strip()
 
163
  ]
164
  return any(p in msg_norm for p in intent_phrases)
165
 
 
 
 
 
 
 
 
 
166
  def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
167
  status_keywords = ["status", "ticket status", "incident status", "check status", "check ticket status", "check incident status"]
168
  if not any(k in msg_norm for k in status_keywords):
 
195
  if not GEMINI_API_KEY:
196
  return False
197
  prompt = (
198
+ "Classify if the following user message indicates that the issue is resolved or working now.
199
+ "
200
+ "Return only 'true' or 'false'.
201
+
202
+ "
203
  f"Message: {user_message}"
204
  )
205
  headers = {"Content-Type": "application/json"}
 
217
  except Exception:
218
  return False
219
 
220
+ # ---------------- Query-normalized, order-preserving filter ----------------
 
 
 
 
 
 
 
 
 
221
  STRICT_OVERLAP = 3
222
  MAX_SENTENCES_STRICT = 4
223
  MAX_SENTENCES_CONCISE = 3
 
229
  return t
230
 
231
  def _split_sentences(ctx: str) -> List[str]:
232
+ raw_sents = re.split(r"(?<=[.!?])\s+|
233
+ +|•\s*|\-\s*", ctx or "")
234
  return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
235
 
236
  def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str, Any]]:
 
253
  matched_any.append(s)
254
  if matched_exact:
255
  kept = matched_exact[:MAX_SENTENCES_STRICT]
256
+ return "
257
+ ".join(kept).strip(), {'mode': 'exact', 'matched_count': len(kept), 'all_sentences': len(sentences)}
258
  if matched_any:
259
  kept = matched_any[:MAX_SENTENCES_CONCISE]
260
+ return "
261
+ ".join(kept).strip(), {'mode': 'concise', 'matched_count': len(kept), 'all_sentences': len(sentences)}
262
  kept = sentences[:MAX_SENTENCES_CONCISE]
263
+ return "
264
+ ".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
+ # ---------------- Navigation extraction ----------------
267
+ NAV_LINE_REGEX = re.compile(r"(navigate\s+to|login|log in|menu|screen)", re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
  def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
270
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
271
+ kept: List[str] = []
272
  for ln in lines:
273
+ if NAV_LINE_REGEX.search(ln):
274
  kept.append(ln)
275
+ if len(kept) >= max_lines:
276
+ break
277
+ return "
278
+ ".join(kept).strip() if kept else (text or "").strip()
279
+
280
+ # ---------------- Errors extraction (tightened for auth/role/access) ----------------
281
+ ERROR_STARTS = (
282
+ "error", "resolution", "fix", "verify", "check",
283
+ "permission", "access", "authorization", "authorisation",
284
+ "role", "role mapping", "security profile", "escalation"
285
+ )
286
 
287
+ def _extract_errors_only(text: str, max_lines: int = 12) -> str:
288
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
289
+ kept: List[str] = []
290
  for ln in lines:
291
+ low = ln.lower()
292
+ if low.startswith(ERROR_STARTS) or any(key in low for key in ERROR_STARTS):
293
  kept.append(ln)
294
+ if len(kept) >= max_lines:
295
+ break
296
+ return "
297
+ ".join(kept).strip() if kept else (text or "").strip()
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  @app.get("/")
300
  async def health_check():
301
  return {"status": "ok"}
 
316
  }
317
  if msg_norm in ("no", "no thanks", "nope"):
318
  return {
319
+ "bot_response": "Glad I could help! If you need anything else later, just let me know.",
320
  "status": "OK",
321
  "end_chat": True,
322
  "followup": None,
 
324
  "debug": {"intent": "end_conversation"},
325
  }
326
 
327
+ # --- Resolution ack (auto incident + mark Resolved) ---
328
  is_llm_resolved = _classify_resolution_llm(input_data.user_message)
329
  if _has_negation_resolved(msg_norm):
330
  is_llm_resolved = False
 
381
  if _is_incident_intent(msg_norm):
382
  return {
383
  "bot_response": (
384
+ "Okay, let’s create a ServiceNow incident.
385
+
386
+ "
387
+ "Please provide:
388
+ • Short Description (one line)
389
+ "
390
  "• Detailed Description (steps, error text, IDs, site, environment)"
391
  ),
392
  "status": (input_data.prev_status or "PARTIAL"),
 
401
  }
402
 
403
  # --- Generic opener ---
404
+ if len(msg_norm.split()) <= 2 or any(p in msg_norm for p in ("issue", "problem", "help", "support")):
405
  return {
406
+ "bot_response": _build_clarifying_message(),
 
 
 
 
 
 
 
407
  "status": "NO_KB_MATCH",
408
  "context_found": False,
409
  "ask_resolved": False,
410
+ "suggest_incident": True, # offer ticket immediately
411
+ "followup": "Reply with the above details or say ‘create ticket’.",
412
  "top_hits": [],
413
  "sources": [],
414
  "debug": {"intent": "generic_issue"},
 
448
  num = result.get("number", number or "unknown")
449
  return {
450
  "bot_response": (
451
+ f"**Ticket:** {num}
452
+ "
453
+ f"**Status:** {state_label}
454
+ "
455
  f"**Issue description:** {short}"
456
  ),
457
  "status": "OK",
 
469
 
470
  # --- Hybrid KB search ---
471
  kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
472
+ documents = kb_results.get("documents", [])
473
+ metadatas = kb_results.get("metadatas", [])
474
+ distances = kb_results.get("distances", [])
475
+ combined = kb_results.get("combined_scores", [])
476
+
477
+ items: List[Dict[str, Any]] = []
478
+ for i, doc in enumerate(documents):
479
+ text = doc.strip() if isinstance(doc, str) else ""
480
+ if not text:
481
+ continue
482
+ meta = metadatas[i] if i < len(metadatas) and isinstance(metadatas[i], dict) else {}
483
+ score = distances[i] if i < len(distances) else None
484
+ comb = combined[i] if i < len(combined) else None
485
+ m = dict(meta)
486
+ if score is not None:
487
+ m["distance"] = score
488
+ if comb is not None:
489
+ m["combined"] = comb
490
+ items.append({"text": text, "meta": m})
491
+ selected = items[:max(1, 2)]
492
+ context_raw = "
493
+
494
+ ---
495
+
496
+ ".join([s["text"] for s in selected]) if selected else ""
497
  filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
498
  context = filtered_text
499
+ context_found = bool(context.strip())
500
+ best_distance = min([d for d in distances if d is not None], default=None) if distances else None
501
+ best_combined = max([c for c in combined if c is not None], default=None) if combined else None
502
  detected_intent = kb_results.get("user_intent", "neutral")
503
  actions = kb_results.get("actions", [])
504
  best_doc = kb_results.get("best_doc")
505
+ top_meta = (metadatas or [{}])[0] if metadatas else {}
506
 
 
507
  if detected_intent == "steps" and best_doc:
 
508
  full_steps = get_best_steps_section_text(best_doc)
509
  if not full_steps:
 
510
  sec = (top_meta or {}).get("section")
511
  if sec:
512
  full_steps = get_section_text(best_doc, sec)
513
  if full_steps:
514
+ context = _as_numbered_steps(full_steps)
 
 
515
 
516
+ # Intent shaping
517
  q = (input_data.user_message or "").lower()
518
+ if detected_intent == "errors" or any(k in q for k in [
519
+ "error", "issue", "fail", "not working", "resolution", "fix",
520
+ "permission", "access", "authorization", "escalation", "role", "security profile"
521
+ ]):
522
+ context = _extract_errors_only(context, max_lines=12)
523
  elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
524
  context = _extract_navigation_only(context, max_lines=6)
525
+ elif detected_intent == "steps":
526
+ context = _as_numbered_steps(context)
527
 
528
  # Gating
529
  short_query = len((input_data.user_message or "").split()) <= 4
530
  gate_combined_no_kb = 0.22 if short_query else 0.28
531
  gate_combined_ok = 0.60 if short_query else 0.55
532
  gate_distance_no_kb = 2.0
 
533
  if (not context_found or not context.strip()) or (
534
  (best_combined is None or best_combined < gate_combined_no_kb)
535
  and (best_distance is None or best_distance >= gate_distance_no_kb)
 
545
  "status": "NO_KB_MATCH",
546
  "context_found": False,
547
  "ask_resolved": False,
548
+ "suggest_incident": True,
549
+ "followup": ("Reply with the above details or say ‘create ticket’." if not second_try else "Shall I create a ticket now?"),
550
  "top_hits": [],
551
  "sources": [],
552
  "debug": {"used_chunks": 0, "second_try": second_try, "best_distance": best_distance, "best_combined": best_combined},
553
  }
554
 
555
+ # LLM rewrite (constrained to provided context)
556
  enhanced_prompt = (
557
+ "From the provided context, output only the actionable content relevant to the user's question. "
558
+ "Use ONLY the provided context; do NOT add information that is not present.
559
+
560
+ "
561
+ f"### Context
562
+ {context}
563
+
564
+ "
565
+ f"### Question
566
+ {input_data.user_message}
567
+
568
+ "
569
+ "### Output
570
+ "
571
+ "- Return numbered/bulleted steps in the same order when appropriate.
572
+ "
573
+ "- If context is insufficient, add: 'This may be partial based on available KB.'
574
+ "
575
  )
576
  headers = {"Content-Type": "application/json"}
577
  payload = {"contents": [{"parts": [{"text": enhanced_prompt}]}]}
578
  try:
579
+ resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=GEMINI_SSL_VERIFY)
580
  try:
581
  result = resp.json()
582
  except Exception:
 
584
  except Exception:
585
  resp = type("RespStub", (), {"status_code": 0})()
586
  result = {}
 
587
  try:
588
+ bot_text = result["candidates"][0]["content"]["parts"][0]["text"] if isinstance(result, dict) else ""
589
  except Exception:
590
  bot_text = ""
 
591
  if not bot_text.strip():
592
  bot_text = context
593
+ bot_text = "
594
+ ".join([ln for ln in bot_text.splitlines() if not re.match(r"^\s*source\s*:", ln, flags=re.IGNORECASE)]).strip()
595
+
596
+ if detected_intent == "steps":
597
+ bot_text = _as_numbered_steps(bot_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
  status = "OK" if (
600
  (best_combined is not None and best_combined >= gate_combined_ok)
 
610
  "status": status,
611
  "context_found": True,
612
  "ask_resolved": (status == "OK"),
613
+ "suggest_incident": (status == "PARTIAL"),
614
+ "followup": ("Is this helpful or should I raise a ticket?" if status == "PARTIAL" else None),
615
  "top_hits": [],
616
  "sources": [],
617
  "debug": {
618
+ "used_chunks": len(context.split("
619
+
620
+ ---
621
+
622
+ ")) if context else 0,
623
  "best_distance": best_distance,
624
  "best_combined": best_combined,
625
  "http_status": getattr(resp, "status_code", 0),
 
636
  except Exception as e:
637
  raise HTTPException(status_code=500, detail=safe_str(e))
638
 
639
+
640
  def _set_incident_resolved(sys_id: str) -> bool:
641
  try:
642
  token = get_valid_token()
 
650
  "Content-Type": "application/json",
651
  }
652
  url = f"{instance_url}/api/now/table/incident/{sys_id}"
 
653
  close_code_val = os.getenv("SERVICENOW_CLOSE_CODE", "Solution provided")
654
  close_notes_val = os.getenv("SERVICENOW_RESOLUTION_NOTES", "Issue resolved, user confirmed")
655
  caller_sysid = os.getenv("SERVICENOW_CALLER_SYSID")
656
  resolved_by_sysid = os.getenv("SERVICENOW_RESOLVED_BY_SYSID")
657
  assign_group = os.getenv("SERVICENOW_ASSIGNMENT_GROUP_SYSID")
658
  require_progress = os.getenv("SERVICENOW_REQUIRE_IN_PROGRESS_FIRST", "false").lower() in ("1", "true", "yes")
 
659
  if require_progress:
660
  try:
661
  resp1 = requests.patch(url, headers=headers, json={"state": "2"}, verify=VERIFY_SSL, timeout=25)
 
728
  if bool(input_data.mark_resolved) and sys_id not in ("<unknown>", None):
729
  ok = _set_incident_resolved(sys_id)
730
  resolved_note = " (marked Resolved)" if ok else " (could not mark Resolved; please update manually)"
731
+ ticket_text = f"Incident created: {inc_number}{resolved_note}" if inc_number else "Incident created."
732
+ return {
733
+ "bot_response": f"✅ {ticket_text}",
734
+ "debug": "Incident created via ServiceNow",
735
+ "persist": True,
736
+ "show_assist_card": True,
737
+ "followup": "Is there anything else I can assist you with?",
738
+ }
739
  else:
740
+ raise HTTPException(status_code=500, detail=(result or {}).get("error", "Unknown error"))
 
 
 
 
 
 
 
741
  except Exception as e:
742
  raise HTTPException(status_code=500, detail=safe_str(e))
743
 
 
745
  async def generate_ticket_desc_ep(input_data: TicketDescInput):
746
  try:
747
  prompt = (
748
+ f"You are helping generate ServiceNow ticket descriptions based on the issue: {input_data.issue}.
749
+ "
750
+ "Please return the output strictly in JSON format with the following keys:
751
+ "
752
+ "{
753
+ "
754
+ ' "ShortDescription": "A concise summary of the issue (max 100 characters)",
755
+ '
756
+ ' "DetailedDescription": "A detailed explanation of the issue"
757
+ '
758
+ "}
759
+ "
760
  "Do not include any extra text, comments, or explanations outside the JSON."
761
  )
762
  headers = {"Content-Type": "application/json"}
763
  payload = {"contents": [{"parts": [{"text": prompt}]}]}
764
+ resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=GEMINI_SSL_VERIFY)
765
  try:
766
  data = resp.json()
767
  except Exception:
 
772
  return {"ShortDescription": "", "DetailedDescription": "", "error": "Gemini parsing failed"}
773
  if text.startswith("```"):
774
  lines = [ln for ln in text.splitlines() if not ln.strip().startswith("```")]
775
+ text = "
776
+ ".join(lines).strip()
777
  try:
778
  ticket_json = json.loads(text)
779
  return {
 
812
  number = result.get("number", input_data.number or "unknown")
813
  return {
814
  "bot_response": (
815
+ f"**Ticket:** {number}
816
+ "
817
+ f"**Status:** {state_label}
818
+ "
819
  f"**Issue description:** {short}"
820
+ ).replace("
821
+ ", "
822
+ "),
823
  "followup": "Is there anything else I can assist you with?",
824
  "show_assist_card": True,
825
  "persist": True,
 
827
  }
828
  except Exception as e:
829
  raise HTTPException(status_code=500, detail=safe_str(e))