srilakshu012456 commited on
Commit
5964557
·
verified ·
1 Parent(s): 2ef7e03

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +177 -500
main.py CHANGED
@@ -1,4 +1,5 @@
1
 
 
2
  import os
3
  import json
4
  import re
@@ -12,8 +13,9 @@ from fastapi import FastAPI, HTTPException
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from pydantic import BaseModel
14
  from dotenv import load_dotenv
 
15
 
16
- # Import KB services
17
  from services.kb_creation import (
18
  collection,
19
  ingest_documents,
@@ -28,10 +30,9 @@ from services.kb_creation import (
28
  from services.login import router as login_router
29
  from services.generate_ticket import get_valid_token, create_incident
30
 
31
-
32
- # ------------------------------------------------------------------------------
33
  # Environment
34
- # ------------------------------------------------------------------------------
35
  load_dotenv()
36
  VERIFY_SSL = os.getenv("SERVICENOW_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
37
  GEMINI_SSL_VERIFY = os.getenv("GEMINI_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
@@ -42,17 +43,15 @@ GEMINI_URL = (
42
  )
43
  os.environ["POSTHOG_DISABLED"] = "true"
44
 
45
-
46
  def safe_str(e: Any) -> str:
47
  try:
48
  return builtins.str(e)
49
  except Exception:
50
  return "<error stringify failed>"
51
 
52
-
53
- # ------------------------------------------------------------------------------
54
  # App / Lifespan
55
- # ------------------------------------------------------------------------------
56
  @asynccontextmanager
57
  async def lifespan(app: FastAPI):
58
  try:
@@ -66,14 +65,13 @@ async def lifespan(app: FastAPI):
66
  print(f"[KB] ingestion failed: {safe_str(e)}")
67
  yield
68
 
69
-
70
  app = FastAPI(lifespan=lifespan)
71
  app.include_router(login_router)
72
 
73
- # Allow your HF Space frontend; add localhost for local dev if needed
74
  origins = [
75
  "https://chatbotnova-chatbot-frontend.hf.space",
76
- # "http://localhost:5173", # uncomment for local dev (Vite)
77
  ]
78
  app.add_middleware(
79
  CORSMiddleware,
@@ -83,31 +81,26 @@ app.add_middleware(
83
  allow_headers=["*"],
84
  )
85
 
86
-
87
- # ------------------------------------------------------------------------------
88
  # Models
89
- # ------------------------------------------------------------------------------
90
  class ChatInput(BaseModel):
91
  user_message: str
92
  prev_status: Optional[str] = None
93
  last_issue: Optional[str] = None
94
 
95
-
96
  class IncidentInput(BaseModel):
97
  short_description: str
98
  description: str
99
  mark_resolved: Optional[bool] = False
100
 
101
-
102
  class TicketDescInput(BaseModel):
103
  issue: str
104
 
105
-
106
  class TicketStatusInput(BaseModel):
107
  sys_id: Optional[str] = None
108
  number: Optional[str] = None
109
 
110
-
111
  STATE_MAP = {
112
  "1": "New",
113
  "2": "In Progress",
@@ -117,19 +110,16 @@ STATE_MAP = {
117
  "8": "Canceled",
118
  }
119
 
120
-
121
- # ------------------------------------------------------------------------------
122
- # Generic helpers
123
- # ------------------------------------------------------------------------------
124
  NUMBERING_STYLE = os.getenv("NUMBERING_STYLE", "digit").lower() # 'digit' or 'step'
125
-
126
  DOMAIN_STATUS_TERMS = (
127
  "shipment", "order", "load", "trailer", "wave",
128
  "inventory", "putaway", "receiving", "appointment",
129
  "dock", "door", "manifest", "pallet", "container",
130
  "asn", "grn", "pick", "picking"
131
  )
132
-
133
  ERROR_FAMILY_SYNS = {
134
  "NOT_FOUND": (
135
  "not found", "missing", "does not exist", "doesn't exist",
@@ -159,7 +149,6 @@ ERROR_FAMILY_SYNS = {
159
  ),
160
  }
161
 
162
-
163
  def _detect_error_families(msg: str) -> list:
164
  low = (msg or "").lower()
165
  low_norm = re.sub(r"[^\w\s]", " ", low)
@@ -170,13 +159,11 @@ def _detect_error_families(msg: str) -> list:
170
  fams.append(fam)
171
  return fams
172
 
173
-
174
  def _is_domain_status_context(msg_norm: str) -> bool:
175
  if "status locked" in msg_norm or "locked status" in msg_norm:
176
  return True
177
  return any(term in msg_norm for term in DOMAIN_STATUS_TERMS)
178
 
179
-
180
  def _normalize_lines(text: str) -> List[str]:
181
  raw = (text or "")
182
  try:
@@ -184,23 +171,14 @@ def _normalize_lines(text: str) -> List[str]:
184
  except Exception:
185
  return [raw.strip()] if raw.strip() else []
186
 
187
-
188
- # --- NEW: action-aware filter for numbered steps (creation/update/delete) ---
189
- def _filter_numbered_steps_by_actions(numbered_text: str,
190
- wanted: set[str],
191
- exclude: set[str]) -> str:
192
- """
193
- Keep only lines that match the 'wanted' actions and drop lines that match 'exclude'.
194
- Works on already-numbered/bulleted text (one step per line).
195
- """
196
- # Keep synonyms aligned with kb_creation.ACTION_SYNONYMS
197
  ACTION_SYNONYMS = {
198
  "create": ("create", "creation", "add", "new", "generate"),
199
  "update": ("update", "modify", "change", "edit"),
200
  "delete": ("delete", "remove"),
201
  "navigate": ("navigate", "go to", "open"),
202
  }
203
-
204
  def _has_any(line: str, keys: set[str]) -> bool:
205
  low = (line or "").lower()
206
  for k in keys:
@@ -211,142 +189,42 @@ def _filter_numbered_steps_by_actions(numbered_text: str,
211
 
212
  out_lines = []
213
  for ln in (numbered_text or "").splitlines():
214
- # If an exclude action appears in the line, drop it
215
  if _has_any(ln, exclude):
216
  continue
217
- # If caller asked for specific actions, keep only those
218
  if wanted:
219
  if _has_any(ln, wanted):
220
  out_lines.append(ln)
221
  else:
222
- # no explicit wanted actions → keep as-is
223
  out_lines.append(ln)
224
-
225
- # If over-filtering made it empty, fall back to original text
226
  return "\n".join(out_lines).strip() or (numbered_text or "").strip()
227
 
228
- # --- NEW: extract the anchor clause from user message ---
229
- def _extract_anchor_from_query(msg: str) -> dict:
230
- """
231
- Split user message into:
232
- - 'anchor': the clause we should match against SOP steps.
233
- - 'has_followup': True when user is clearly asking what comes next (what next, what to do, then).
234
- Works across any SOP text; no domain words required.
235
- """
236
- raw = (msg or "").strip()
237
- low = _norm_text(raw)
238
-
239
- # very small set of generic follow-up cues (no domain synonyms)
240
- FOLLOWUP_CUES = ("what next", "what is next", "what to do", "then", "after that", "next")
241
-
242
- has_followup = any(cue in low for cue in FOLLOWUP_CUES)
243
-
244
- # Split by common separators to isolate the anchor clause
245
- parts = [p.strip() for p in re.split(r"[?.,;:\-\n]+", raw) if p.strip()]
246
- if not parts:
247
- return {"anchor": raw, "has_followup": has_followup}
248
-
249
- # If a follow-up cue is in the last part, prefer the preceding part as the anchor
250
- last = parts[-1]
251
- last_low = _norm_text(last)
252
- if any(cue in last_low for cue in FOLLOWUP_CUES) and len(parts) >= 2:
253
- anchor = parts[-2]
254
- else:
255
- anchor = parts[0] if len(parts) == 1 else parts[-1] # favor the end if user writes "... , then what next"
256
-
257
- return {"anchor": anchor.strip(), "has_followup": has_followup}
258
-
259
- # --- Core: keyword-free, anchor-based "next steps" ---
260
- def _anchor_next_steps(user_message: str, numbered_text: str, max_next: int = 8) -> list | None:
261
- """
262
- Locate the step line (or sentence inside it) that best matches the user's anchor clause,
263
- then return ONLY subsequent steps (renumbered by caller). Returns None if no strong anchor is found.
264
- """
265
-
266
- steps = _split_sop_into_steps(numbered_text)
267
- if not steps:
268
- return None
269
-
270
- info = _extract_anchor_from_query(user_message)
271
- anchor = info.get("anchor", "").strip()
272
- if not anchor:
273
- return None
274
- anchor_norm = _norm_text(anchor)
275
- has_followup = bool(info.get("has_followup"))
276
-
277
- best_idx, best_score, best_literal = -1, -1.0, False
278
- candidate_indices = []
279
-
280
- for idx, step_line in enumerate(steps):
281
- # Score on full line
282
- s_full = _similarity(anchor, step_line)
283
- literal_hit = False
284
- sent_scores = [s_full]
285
-
286
- # Also score each sentence within this step (to match middle sentences)
287
- for s in _split_sentences(step_line):
288
- sent_scores.append(_similarity(anchor, s))
289
- # literal containment (punctuation-insensitive)
290
- a_flat = re.sub(r"\W+", "", anchor_norm)
291
- s_flat = re.sub(r"\W+", "", _norm_text(s))
292
- if a_flat and (a_flat in s_flat or s_flat in a_flat):
293
- literal_hit = True
294
-
295
- score = max(sent_scores)
296
- candidate_indices.append((idx, score, literal_hit))
297
-
298
- # Choose best; for near ties prefer later index (progress assumption)
299
- candidate_indices.sort(key=lambda t: (t[1], t[0]), reverse=True)
300
- best_idx, best_score, best_literal = candidate_indices[0]
301
-
302
- # Dynamic threshold:
303
- # - literal containment → accept
304
- # - follow-up cue → lower threshold (e.g., 0.50)
305
- # - otherwise require a modest similarity
306
- tok_count = len([t for t in anchor_norm.split() if len(t) > 1])
307
-
308
- if best_literal:
309
- accept = True
310
- else:
311
- base_ok = best_score >= (0.55 if not has_followup else 0.50)
312
- len_ok = (best_score >= 0.40) and (tok_count >= 3)
313
- accept = base_ok or len_ok
314
-
315
- if not accept:
316
- return None
317
-
318
- # Start from the step AFTER the matched one
319
- start = best_idx + 1
320
- if start >= len(steps):
321
- return [] # already at final step
322
- end = min(start + max_next, len(steps))
323
- next_steps = steps[start:end]
324
-
325
- # Dedupe just in case adjacent chunks contain repeated lines
326
- # (use the same numbering by caller)
327
- return [ln for ln in _dedupe_lines("\n".join(next_steps)).splitlines() if ln.strip()]
328
-
329
  def _ensure_numbering(text: str) -> str:
330
- """
331
- Normalize raw SOP steps into a clean numbered list using circled digits.
332
- Robust against '1.', '1)', 'Step 1:', bullets ('-', '*', '•'), and circled digits.
333
- """
334
  text = re.sub(r"[\u2060\u200B]", "", text or "")
335
  lines = [ln.strip() for ln in (text or "").splitlines() if ln and ln.strip()]
336
  if not lines:
337
  return text or ""
338
-
339
  para = " ".join(lines).strip()
340
  if not para:
341
  return ""
342
-
343
- # Hard breaks at step boundaries
344
- para_clean = re.sub(r"(?:\b\d+\s*[.\)])\s+", "\n\n\n", para) # 1. / 1)
345
- para_clean = re.sub(r"(?:[\u2460-\u2473]\s+)", "\n\n\n", para_clean) # circled digits
346
- para_clean = re.sub(r"(?i)\bstep\s*\d+\s*:\s*", "\n\n\n", para_clean) # Step 1:
347
-
348
  segments = [seg.strip() for seg in para_clean.split("\n\n\n") if seg.strip()]
349
-
350
  if len(segments) < 2:
351
  tmp = [ln.strip() for ln in para.splitlines() if ln.strip()]
352
  segments = tmp if len(tmp) > 1 else [seg.strip() for seg in re.split(r"(?<=[.!?])\s+", para) if seg.strip()]
@@ -354,51 +232,36 @@ def _ensure_numbering(text: str) -> str:
354
  def strip_prefix_any(s: str) -> str:
355
  return re.sub(
356
  r"^\s*(?:"
357
- r"(?:\d+\s*[.\)])" # leading numbers 1., 2)
358
- r"|(?:step\s*\d+:?)" # Step 1:
359
- r"|(?:[-*\u2022])" # bullets
360
- r"|(?:[\u2460-\u2473])" # circled digits
361
- r")\s*",
362
- "",
363
- (s or "").strip(),
364
- flags=re.IGNORECASE
365
  )
366
-
367
  clean_segments = [strip_prefix_any(seg) for seg in segments if seg.strip()]
368
-
369
  circled = {
370
  1: "\u2460", 2: "\u2461", 3: "\u2462", 4: "\u2463", 5: "\u2464",
371
  6: "\u2465", 7: "\u2466", 8: "\u2467", 9: "\u2468", 10: "\u2469",
372
  11: "\u246a", 12: "\u246b", 13: "\u246c", 14: "\u246d", 15: "\u246e",
373
  16: "\u246f", 17: "\u2470", 18: "\u2471", 19: "\u2472", 20: "\u2473"
374
  }
375
-
376
  out = []
377
  for idx, seg in enumerate(clean_segments, start=1):
378
  marker = circled.get(idx, f"{idx})")
379
  out.append(f"{marker} {seg}")
380
-
381
  return "\n".join(out)
382
 
383
  def _norm_text(s: str) -> str:
384
- """
385
- Normalize text: lower, remove punctuation, light stemming for common verb endings
386
- and plural 's'. (scan/scanned/scanning -> scan; confirm/confirming -> confirm)
387
- """
388
  s = (s or "").lower()
389
  s = re.sub(r"[^\w\s]", " ", s)
390
  s = re.sub(r"\s+", " ", s).strip()
391
  if not s:
392
  return s
393
-
394
- # crude token-level stemming (safe for English verbs/nouns seen in SOPs)
395
  toks = s.split()
396
  stemmed = []
397
  for t in toks:
398
- # strip plural 's' unless token is very short
399
  if len(t) > 3 and t.endswith("s"):
400
  t = t[:-1]
401
- # past tense/gerund
402
  if len(t) > 4 and t.endswith("ed"):
403
  t = t[:-2]
404
  if len(t) > 5 and t.endswith("ing"):
@@ -406,17 +269,12 @@ def _norm_text(s: str) -> str:
406
  stemmed.append(t)
407
  return " ".join(stemmed).strip()
408
 
409
-
410
  def _split_sop_into_steps(numbered_text: str) -> list:
411
- """
412
- Split a numbered/bulleted SOP block (already passed through _ensure_numbering)
413
- into atomic steps. Returns a list of raw step strings (order preserved).
414
- """
415
  lines = [ln.strip() for ln in (numbered_text or "").splitlines() if ln.strip()]
416
  steps = []
417
  for ln in lines:
418
  cleaned = re.sub(
419
- r"^\s*(?:[\u2460-\u2473]|\d+[.\)]|[-*•])\s*",
420
  "",
421
  ln
422
  )
@@ -424,186 +282,84 @@ def _split_sop_into_steps(numbered_text: str) -> list:
424
  steps.append(cleaned)
425
  return steps
426
 
427
- def _soft_match_score(a: str, b: str) -> float:
428
- """
429
- Fuzzy match score using stemmed tokens + bigram overlap.
430
- Returns 0..1 (higher is more similar).
431
- """
432
- a_norm = _norm_text(a)
433
- b_norm = _norm_text(b)
434
- ta = set(a_norm.split())
435
- tb = set(b_norm.split())
436
- if not ta or not tb:
437
- return 0.0
438
-
439
- # token Jaccard
440
  inter = len(ta & tb)
441
- union = len(ta | tb)
442
- jacc = inter / union if union else 0.0
443
-
444
- # bigrams
445
- def _bigrams(tokens: List[str]) -> set:
446
  return set([" ".join(tokens[i:i+2]) for i in range(len(tokens)-1)]) if len(tokens) > 1 else set()
447
- ab = _bigrams(a_norm.split())
448
- bb = _bigrams(b_norm.split())
449
- bigram_inter = len(ab & bb)
450
- bigram_union = len(ab | bb) if (ab or bb) else 1
451
- big = bigram_inter / bigram_union
452
-
453
- # combine (weights tuned to prefer phrase overlaps slightly)
454
- return min(1.0, 0.65 * jacc + 0.45 * big)
455
-
456
- def _detect_next_intent(user_query: str) -> bool:
457
- q = _norm_text(user_query)
458
- keys = [
459
- "after", "after this", "what next", "whats next", "next step",
460
- "then what", "following step", "continue", "subsequent", "proceed"
461
- ]
462
- return any(k in q for k in keys)
463
-
464
-
465
-
466
- def _resolve_next_steps(user_query: str, numbered_text: str, max_next: int = 8, min_score: float = 0.25):
467
- """
468
- Robust next-step resolver:
469
- 1) Detect 'what next' intent.
470
- 2) Stem & match query against each step using tokens + bigrams + synonyms.
471
- 3) If a good anchor is found, return ONLY subsequent steps (window=max_next).
472
- Else return None (fallback to full SOP rendering).
473
- """
474
- if not _detect_next_intent(user_query):
475
- return None
476
 
 
477
  steps = _split_sop_into_steps(numbered_text)
478
  if not steps:
479
  return None
 
 
 
 
 
 
480
 
481
- q_norm = _norm_text(user_query)
482
- q_tokens = [t for t in q_norm.split() if len(t) > 1]
 
 
 
 
 
 
 
 
 
 
 
483
 
484
- best_idx, best_score = -1, -1.0
485
- for idx, step in enumerate(steps):
486
- # base fuzzy score
487
- s1 = _soft_match_score(user_query, step)
488
- # synonym hits
489
- syn = _syn_hits(q_tokens, step)
490
- # combined score (synonyms are discrete)
491
- score = s1 + 0.12 * syn
492
- if score > best_score:
493
- best_score, best_idx = score, idx
494
 
495
- # Looser threshold to accept anchors with synonyms / tense differences
496
- if best_idx < 0 or best_score < min_score:
497
- return None # let caller fall back to the full SOP
 
 
 
 
 
 
498
 
499
  start = best_idx + 1
500
  if start >= len(steps):
501
- return [] # already at final step
502
-
503
  end = min(start + max_next, len(steps))
504
- return steps[start:end]
505
-
506
- def _syn_hits(q_tokens: List[str], step_line: str) -> int:
507
- """
508
- Synonym/alias hits tailored for WMS picking language.
509
- Ex: 'scanned location' ~ 'prompt location' ~ 'scan location'
510
- """
511
- step = _norm_text(step_line)
512
- # define light synonyms
513
- SYNS = {
514
- "scan": {"scan", "prompt", "display", "show"},
515
- "confirm": {"confirm", "verify", "check"},
516
- "item": {"item", "sku", "product"},
517
- "qty": {"qty", "quantity", "count"},
518
- "place": {"place", "put", "stow"},
519
- "complete": {"complete", "finish", "done"},
520
- "staging": {"staging", "stage", "dock", "door"},
521
- "location": {"location", "loc"},
522
- "pallet": {"pallet", "container"},
523
- }
524
-
525
- hits = 0
526
- # count matches when any synonym family term appears in the step line
527
- for qt in q_tokens:
528
- for fam, words in SYNS.items():
529
- if qt == fam or qt in words:
530
- if any(w in step for w in words):
531
- hits += 1
532
- # simple phrase anchors boost (common picking anchors)
533
- PHRASES = ("scan location", "confirm item", "pick quantity", "place picked", "move pallet", "staging area")
534
- for ph in PHRASES:
535
- if ph in step:
536
- hits += 1
537
- return hits
538
-
539
- def _format_steps_as_numbered(steps: list) -> str:
540
- """Render a small list of steps with circled numbers for visual continuity."""
541
- circled = {
542
- 1: "\u2460", 2: "\u2461", 3: "\u2462", 4: "\u2463", 5: "\u2464",
543
- 6: "\u2465", 7: "\u2466", 8: "\u2467", 9: "\u2468", 10: "\u2469",
544
- 11: "\u246a", 12: "\u246b", 13: "\u246c", 14: "\u246d", 15: "\u246e",
545
- 16: "\u246f", 17: "\u2470", 18: "\u2471", 19: "\u2472", 20: "\u2473"
546
- }
547
- out = []
548
- for i, s in enumerate(steps, start=1):
549
- out.append(f"{circled.get(i, str(i))} {s}")
550
- return "\n".join(out)
551
-
552
-
553
- # --- NEW: sentence-level action filter for mixed paragraphs ---
554
- def _filter_sentences_by_actions(numbered_text: str,
555
- wanted: set[str],
556
- exclude: set[str]) -> str:
557
- """
558
- Works on already-numbered text. Splits each line into sentences and
559
- keeps only sentences that match wanted actions; drops sentences matching exclude actions.
560
- Falls back gracefully if no wanted match is found.
561
- """
562
- ACTION_SYNONYMS = {
563
- "create": ("create", "creation", "add", "new", "generate", "setup", "set up", "register"),
564
- "update": ("update", "modify", "change", "edit", "amend"),
565
- "delete": ("delete", "remove", "cancel", "void"),
566
- "navigate": ("navigate", "go to", "open"),
567
- }
568
-
569
- def _has_any(text: str, keys: set[str]) -> bool:
570
- low = (text or "").lower()
571
- for k in keys:
572
- for syn in ACTION_SYNONYMS.get(k, (k,)):
573
- if syn in low:
574
- return True
575
- return False
576
-
577
- def _split_sentences(block: str) -> list:
578
- # robust split on ., !, ? followed by space
579
- import re
580
- parts = [s.strip() for s in re.split(r"(?<=[.!?])\s+", block or "") if s.strip()]
581
- return parts if parts else ([block.strip()] if block.strip() else [])
582
-
583
- out_lines = []
584
- for ln in (numbered_text or "").splitlines():
585
- sent_keep = []
586
- sentences = _split_sentences(ln)
587
- for s in sentences:
588
- if exclude and _has_any(s, exclude):
589
- continue
590
- if wanted:
591
- if _has_any(s, wanted):
592
- sent_keep.append(s)
593
- else:
594
- # no explicit wanted → keep unless excluded
595
- sent_keep.append(s)
596
-
597
- # If nothing matched wanted but line mentions wanted elsewhere, keep the whole line
598
- if wanted and not sent_keep:
599
- if _has_any(ln, wanted) and not _has_any(ln, exclude):
600
- sent_keep = sentences
601
-
602
- if sent_keep:
603
- out_lines.append(" ".join(sent_keep))
604
-
605
- return "\n".join(out_lines).strip() or (numbered_text or "").strip()
606
 
 
607
  def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str, Any]]:
608
  STRICT_OVERLAP = 3
609
  MAX_SENTENCES_STRICT = 4
@@ -615,20 +371,19 @@ def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str,
615
  t = re.sub(r"\s+", " ", t).strip()
616
  return t
617
 
618
- def _split_sentences(ctx: str) -> List[str]:
619
  raw_sents = re.split(r"(?<=[.!?])\s+", ctx or "")
620
  return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
621
 
622
  ctx = (context or "").strip()
623
  if not ctx or not query:
624
  return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
625
-
626
  q_norm = _norm(query)
627
  q_terms = [t for t in q_norm.split() if len(t) > 2]
628
  if not q_terms:
629
  return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
630
 
631
- sentences = _split_sentences(ctx)
632
  matched_exact, matched_any = [], []
633
  for s in sentences:
634
  s_norm = _norm(s)
@@ -641,14 +396,18 @@ def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str,
641
 
642
  if matched_exact:
643
  kept = matched_exact[:MAX_SENTENCES_STRICT]
644
- return "\n".join(kept).strip(), {'mode': 'exact', 'matched_count': len(kept), 'all_sentences': len(sentences)}
 
 
645
  if matched_any:
646
  kept = matched_any[:MAX_SENTENCES_CONCISE]
647
- return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': len(kept), 'all_sentences': len(sentences)}
648
-
 
649
  kept = sentences[:MAX_SENTENCES_CONCISE]
650
- return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
651
-
 
652
 
653
  def _extract_errors_only(text: str, max_lines: int = 12) -> str:
654
  kept: List[str] = []
@@ -659,7 +418,6 @@ def _extract_errors_only(text: str, max_lines: int = 12) -> str:
659
  break
660
  return "\n".join(kept).strip() if kept else (text or "").strip()
661
 
662
-
663
  def _filter_permission_lines(text: str, max_lines: int = 6) -> str:
664
  PERM_SYNONYMS = (
665
  "permission", "permissions", "access", "authorization", "authorisation",
@@ -674,21 +432,18 @@ def _filter_permission_lines(text: str, max_lines: int = 6) -> str:
674
  break
675
  return "\n".join(kept).strip() if kept else (text or "").strip()
676
 
677
-
678
  def _extract_escalation_line(text: str) -> Optional[str]:
679
  if not text:
680
  return None
681
  lines = _normalize_lines(text)
682
  if not lines:
683
  return None
684
-
685
  start_idx = None
686
  for i, ln in enumerate(lines):
687
  low = ln.lower()
688
  if "escalation" in low or "escalation path" in low or "escalate" in low:
689
  start_idx = i
690
  break
691
-
692
  block: List[str] = []
693
  if start_idx is not None:
694
  for j in range(start_idx, min(len(lines), start_idx + 6)):
@@ -697,29 +452,24 @@ def _extract_escalation_line(text: str) -> Optional[str]:
697
  block.append(lines[j].strip())
698
  else:
699
  block = [ln.strip() for ln in lines if ("->" in ln or "→" in ln)]
700
-
701
  if not block:
702
  return None
703
-
704
  text_block = " ".join(block)
705
  m = re.search(r"escalation[^:]*:\s*(.+)", text_block, flags=re.IGNORECASE)
706
  path = m.group(1).strip() if m else None
707
-
708
  if not path:
709
  arrow_lines = [ln for ln in block if ("->" in ln or "→" in ln)]
710
  if arrow_lines:
711
  path = arrow_lines[0]
712
  if not path:
713
- m2 = re.search(r"(operator.*?administrator|operator.*)", text_block, flags=re.IGNORECASE)
714
  path = m2.group(1).strip() if m2 else None
715
  if not path:
716
  return None
717
-
718
  path = path.replace("->", "→").strip()
719
  path = re.sub(r"^(?i:escalation\s*path)\s*:\s*", "", path).strip()
720
  return f"If you want to escalate the issue, follow: {path}"
721
 
722
-
723
  def _detect_language_hint(msg: str) -> Optional[str]:
724
  if re.search(r"[\u0B80-\u0BFF]", msg or ""): # Tamil
725
  return "Tamil"
@@ -727,13 +477,9 @@ def _detect_language_hint(msg: str) -> Optional[str]:
727
  return "Hindi"
728
  return None
729
 
730
-
731
  def _build_clarifying_message() -> str:
732
- return (
733
- "It seems the issue isn’t resolved yet. Would you like to share a few details so I can check further, "
734
- "or should I raise a ServiceNow ticket for you?"
735
- )
736
-
737
 
738
  def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[str, str]:
739
  issue = (issue_text or "").strip()
@@ -746,7 +492,6 @@ def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[s
746
  ).strip()
747
  return short_desc, long_desc
748
 
749
-
750
  def _is_incident_intent(msg_norm: str) -> bool:
751
  intent_phrases = [
752
  "create ticket", "create a ticket", "raise ticket", "raise a ticket", "open ticket", "open a ticket",
@@ -756,7 +501,6 @@ def _is_incident_intent(msg_norm: str) -> bool:
756
  ]
757
  return any(p in msg_norm for p in intent_phrases)
758
 
759
-
760
  def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
761
  status_keywords = ["status", "ticket status", "incident status", "check status", "check ticket status", "check incident status"]
762
  base_has_status = any(k in msg_norm for k in status_keywords)
@@ -764,10 +508,8 @@ def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
764
  any(w in msg_norm for w in ("ticket", "incident", "servicenow", "snow"))
765
  or bool(re.search(r"\binc\d{5,}\b", msg_norm, flags=re.IGNORECASE))
766
  )
767
- # Disambiguation: if it's a domain status query and not clearly ticket/incident, do NOT route to ticket-status.
768
  if (not base_has_status) or (base_has_status and not has_ticket_marker and _is_domain_status_context(msg_norm)):
769
  return {}
770
-
771
  patterns = [
772
  r"(?:incident\s*id|incidentid|ticket\s*number|number)\s*[:=]?\s*(inc\d+)",
773
  r"(inc\d+)"
@@ -780,7 +522,6 @@ def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
780
  return {"number": val.upper() if val.lower().startswith("inc") else val}
781
  return {"number": None, "ask_number": True}
782
 
783
-
784
  def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
785
  phrases = [
786
  "it is resolved", "resolved", "issue resolved", "problem resolved",
@@ -789,7 +530,6 @@ def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
789
  ]
790
  return any(p in msg_norm for p in phrases)
791
 
792
-
793
  def _has_negation_resolved(msg_norm: str) -> bool:
794
  neg_phrases = [
795
  "not resolved", "issue not resolved", "still not working", "not working",
@@ -797,15 +537,9 @@ def _has_negation_resolved(msg_norm: str) -> bool:
797
  ]
798
  return any(p in msg_norm for p in neg_phrases)
799
 
800
-
801
  def _find_prereq_section_text(best_doc: str) -> str:
802
- """Return the prerequisites section text, trying common heading variants."""
803
  variants = [
804
- "Pre-Requisites",
805
- "Prerequisites",
806
- "Pre Requisites",
807
- "Pre-Requirements",
808
- "Requirements",
809
  ]
810
  for title in variants:
811
  txt = get_section_text(best_doc, title)
@@ -813,25 +547,23 @@ def _find_prereq_section_text(best_doc: str) -> str:
813
  return txt.strip()
814
  return ""
815
 
816
-
817
- # ------------------------------------------------------------------------------
818
  # Health
819
- # ------------------------------------------------------------------------------
820
  @app.get("/")
821
  async def health_check():
822
  return {"status": "ok"}
823
 
824
-
825
- # ------------------------------------------------------------------------------
826
  # Chat
827
- # ------------------------------------------------------------------------------
828
  @app.post("/chat")
829
  async def chat_with_ai(input_data: ChatInput):
830
  assist_followup: Optional[str] = None
831
  try:
832
  msg_norm = (input_data.user_message or "").lower().strip()
833
 
834
- # Yes/No handlers
835
  if msg_norm in ("yes", "y", "sure", "ok", "okay"):
836
  return {
837
  "bot_response": "Great! Tell me what you'd like to do next — check another ticket, create an incident, or describe your issue.",
@@ -850,11 +582,10 @@ async def chat_with_ai(input_data: ChatInput):
850
  "debug": {"intent": "end_conversation"},
851
  }
852
 
853
- # Resolution ack (auto incident + mark Resolved)
854
  is_llm_resolved = _classify_resolution_llm(input_data.user_message)
855
  if _has_negation_resolved(msg_norm):
856
  is_llm_resolved = False
857
-
858
  if (not _has_negation_resolved(msg_norm)) and (_is_resolution_ack_heuristic(msg_norm) or is_llm_resolved):
859
  try:
860
  short_desc, long_desc = _build_tracking_descriptions(input_data.last_issue, input_data.user_message)
@@ -906,12 +637,10 @@ async def chat_with_ai(input_data: ChatInput):
906
  "debug": {"intent": "resolved_ack", "exception": True},
907
  }
908
 
909
- # Incident intent
910
  if _is_incident_intent(msg_norm):
911
  return {
912
- "bot_response": (
913
- "Okay, let's create a ServiceNow incident."
914
- ),
915
  "status": (input_data.prev_status or "PARTIAL"),
916
  "context_found": False,
917
  "ask_resolved": False,
@@ -923,7 +652,7 @@ async def chat_with_ai(input_data: ChatInput):
923
  "debug": {"intent": "create_ticket"},
924
  }
925
 
926
- # Status intent (ticket/incident) — disambiguated
927
  status_intent = _parse_ticket_status_intent(msg_norm)
928
  if status_intent:
929
  if status_intent.get("ask_number"):
@@ -947,7 +676,6 @@ async def chat_with_ai(input_data: ChatInput):
947
  instance_url = os.getenv("SERVICENOW_INSTANCE_URL")
948
  if not instance_url:
949
  raise HTTPException(status_code=500, detail="SERVICENOW_INSTANCE_URL missing")
950
-
951
  headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
952
  number = status_intent.get("number")
953
  url = f"{instance_url}/api/now/table/incident?number={number}"
@@ -955,12 +683,10 @@ async def chat_with_ai(input_data: ChatInput):
955
  data = response.json()
956
  lst = data.get("result", [])
957
  result = (lst or [{}])[0] if response.status_code == 200 else {}
958
-
959
  state_code = builtins.str(result.get("state", "unknown"))
960
  state_label = STATE_MAP.get(state_code, state_code)
961
  short = result.get("short_description", "")
962
  num = result.get("number", number or "unknown")
963
-
964
  return {
965
  "bot_response": (
966
  f"**Ticket:** {num}\n"
@@ -980,9 +706,7 @@ async def chat_with_ai(input_data: ChatInput):
980
  except Exception as e:
981
  raise HTTPException(status_code=500, detail=safe_str(e))
982
 
983
- # -----------------------------
984
- # Hybrid KB search
985
- # -----------------------------
986
  kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
987
  documents = kb_results.get("documents", [])
988
  metadatas = kb_results.get("metadatas", [])
@@ -1006,35 +730,27 @@ async def chat_with_ai(input_data: ChatInput):
1006
 
1007
  selected = items[:max(1, 2)]
1008
  context_raw = "\n\n---\n\n".join([s["text"] for s in selected]) if selected else ""
1009
-
1010
  filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
1011
  context = filtered_text
1012
  context_found = bool(context.strip())
1013
 
1014
- best_distance = (
1015
- min([d for d in distances if d is not None], default=None) if distances else None
1016
- )
1017
- best_combined = (
1018
- max([c for c in combined if c is not None], default=None) if combined else None
1019
- )
1020
 
1021
  detected_intent = kb_results.get("user_intent", "neutral")
1022
  best_doc = kb_results.get("best_doc")
1023
  top_meta = (metadatas or [{}])[0] if metadatas else {}
1024
  msg_low = (input_data.user_message or "").lower()
1025
-
1026
  GENERIC_ERROR_TERMS = ("error", "issue", "problem", "not working", "failed", "failure")
1027
  generic_error_signal = any(t in msg_low for t in GENERIC_ERROR_TERMS)
1028
 
1029
- # Query-based prereq nudge
1030
- PREREQ_TERMS = (
1031
- "pre req", "pre-requisite", "pre-requisites", "prerequisite",
1032
- "prerequisites", "pre requirement", "pre-requirements", "requirements"
1033
- )
1034
  if detected_intent == "neutral" and any(t in msg_low for t in PREREQ_TERMS):
1035
  detected_intent = "prereqs"
1036
 
1037
- # Permissions force
1038
  PERM_QUERY_TERMS = [
1039
  "permission", "permissions", "access", "access right", "authorization", "authorisation",
1040
  "role", "role access", "security", "security profile", "privilege",
@@ -1044,23 +760,13 @@ async def chat_with_ai(input_data: ChatInput):
1044
  if is_perm_query:
1045
  detected_intent = "errors"
1046
 
1047
- # Heading-aware prereq nudge
1048
  sec_title = ((top_meta or {}).get("section") or "").strip().lower()
1049
- PREREQ_HEADINGS = (
1050
- "pre-requisites", "prerequisites", "pre requisites",
1051
- "pre-requirements", "requirements"
1052
- )
1053
  if detected_intent == "neutral" and any(h in sec_title for h in PREREQ_HEADINGS):
1054
  detected_intent = "prereqs"
1055
 
1056
- # ---- FORCE STEPS for "what's next" / "next step" queries ----
1057
- try:
1058
- if _detect_next_intent(input_data.user_message):
1059
- detected_intent = "steps"
1060
- except Exception:
1061
- pass
1062
-
1063
- # Gating
1064
  def _contains_any(s: str, keywords: tuple) -> bool:
1065
  low = (s or "").lower()
1066
  return any(k in low for k in keywords)
@@ -1078,19 +784,16 @@ async def chat_with_ai(input_data: ChatInput):
1078
  "error", "issue", "fail", "failed", "not working", "locked", "mismatch",
1079
  "access", "permission", "status"
1080
  )
1081
-
1082
  matched_count = int(filt_info.get("matched_count") or 0)
1083
  filter_mode = (filt_info.get("mode") or "").lower()
1084
  has_any_action_or_error = _contains_any(msg_low, ACTION_OR_ERROR_TERMS)
1085
  mentions_domain = _contains_any(msg_low, DOMAIN_TERMS)
1086
-
1087
  short_query = len((input_data.user_message or "").split()) <= 4
1088
  gate_combined_ok = 0.60 if short_query else 0.55
1089
  combined_ok = (best_combined is not None and best_combined >= gate_combined_ok)
1090
  weak_domain_only = (mentions_domain and not has_any_action_or_error)
1091
  low_context_hit = (matched_count < 2 and filter_mode in ("concise", "exact"))
1092
-
1093
- strong_steps_bypass = True # next-step override already set steps; allow
1094
  strong_error_signal = len(_detect_error_families(msg_low)) > 0
1095
 
1096
  if (weak_domain_only or (low_context_hit and not combined_ok)) \
@@ -1119,7 +822,7 @@ async def chat_with_ai(input_data: ChatInput):
1119
  },
1120
  }
1121
 
1122
- # Build SOP context if allowed
1123
  escalation_line: Optional[str] = None
1124
  full_errors: Optional[str] = None
1125
  next_step_applied = False
@@ -1127,25 +830,26 @@ async def chat_with_ai(input_data: ChatInput):
1127
  context_preformatted = False
1128
 
1129
  if best_doc and detected_intent == "steps":
 
1130
  sec = (top_meta or {}).get("section")
1131
  if sec:
1132
- # Prefer the exact section of the top hit
1133
- full_steps = get_section_text(best_doc, sec)
1134
  else:
1135
- # Fallback to all steps (rare)
1136
- full_steps = get_best_steps_section_text(best_doc)
1137
-
1138
  if full_steps:
1139
  numbered_full = _ensure_numbering(full_steps)
 
 
1140
  raw_actions = set((kb_results.get("actions") or []))
1141
- msg_low = (input_data.user_message or "").lower()
1142
- # Fallbacks if extractor missed it
1143
- if not raw_actions and ("creation" in msg_low or "create" in msg_low or "set up" in msg_low or "setup" in msg_low):
1144
  raw_actions = {"create"}
1145
- elif not raw_actions and ("update" in msg_low or "modify" in msg_low or "edit" in msg_low or "change" in msg_low):
1146
  raw_actions = {"update"}
1147
- elif not raw_actions and ("delete" in msg_low or "remove" in msg_low or "cancel" in msg_low or "void" in msg_low):
1148
  raw_actions = {"delete"}
 
1149
  wanted, exclude = set(), set()
1150
  if "create" in raw_actions and not ({"update", "delete"} & raw_actions):
1151
  wanted, exclude = {"create"}, {"update", "delete"}
@@ -1153,19 +857,13 @@ async def chat_with_ai(input_data: ChatInput):
1153
  wanted, exclude = {"update"}, {"create", "delete"}
1154
  elif "delete" in raw_actions and not ({"create", "update"} & raw_actions):
1155
  wanted, exclude = {"delete"}, {"create", "update"}
 
1156
  if wanted or exclude:
1157
- # Sentence-level filtering for mixed paragraphs
1158
- numbered_full = _filter_sentences_by_actions(
1159
- numbered_full, wanted=wanted, exclude=exclude
1160
- )
1161
-
1162
-
1163
- next_only = _resolve_next_steps(
1164
- input_data.user_message,
1165
- numbered_full,
1166
- max_next=6,
1167
- min_score=0.35
1168
- )
1169
  if next_only is not None:
1170
  if len(next_only) == 0:
1171
  context = "You are at the final step of this SOP. No further steps."
@@ -1174,12 +872,13 @@ async def chat_with_ai(input_data: ChatInput):
1174
  context_preformatted = True
1175
  else:
1176
  context = _format_steps_as_numbered(next_only)
 
1177
  next_step_applied = True
1178
  next_step_info = {"count": len(next_only)}
1179
  context_preformatted = True
1180
  else:
1181
- context = full_steps
1182
- context_preformatted = False
1183
 
1184
  # clear filter info for debug clarity
1185
  filt_info = {'mode': None, 'matched_count': None, 'all_sentences': None}
@@ -1202,10 +901,10 @@ async def chat_with_ai(input_data: ChatInput):
1202
  if re.match(r"^\s*[\-\*\u2022]\s*", ln) or (":" in ln)
1203
  ]
1204
  context = "\n".join(error_bullets[:8]).strip()
1205
- assist_followup = (
1206
- "Please tell me which error above matches your screen (paste the exact text), "
1207
- "or share a screenshot. I can guide you further or raise a ServiceNow ticket."
1208
- )
1209
  escalation_line = _extract_escalation_line(full_errors)
1210
 
1211
  elif best_doc and detected_intent == "prereqs":
@@ -1213,16 +912,14 @@ async def chat_with_ai(input_data: ChatInput):
1213
  if full_prereqs:
1214
  context = full_prereqs.strip()
1215
  context_found = True
1216
-
1217
  else:
1218
- # Neutral or other intents: use filtered context
1219
- context = filtered_text
1220
 
1221
- # Language hint & paraphrase (for errors only)
1222
  language_hint = _detect_language_hint(input_data.user_message)
1223
  lang_line = f"Respond in {language_hint}." if language_hint else "Respond in a clear, polite tone."
1224
  use_gemini = (detected_intent == "errors")
1225
-
1226
  enhanced_prompt = f"""You are a helpful support assistant. Rewrite the provided context ONLY into clear, user-friendly guidance.
1227
  - Do not add any information that is not present in the context.
1228
  - If the content is an error/access/permission note, paraphrase it into a helpful sentence users can understand.
@@ -1233,7 +930,6 @@ async def chat_with_ai(input_data: ChatInput):
1233
  {input_data.user_message}
1234
  ### Output
1235
  Return ONLY the rewritten guidance."""
1236
-
1237
  headers = {"Content-Type": "application/json"}
1238
  payload = {"contents": [{"parts": [{"text": enhanced_prompt}]}]}
1239
  bot_text = ""
@@ -1254,7 +950,7 @@ Return ONLY the rewritten guidance."""
1254
  except Exception:
1255
  bot_text, http_code = "", 0
1256
 
1257
- # Deterministic local formatting
1258
  if detected_intent == "steps":
1259
  if context_preformatted:
1260
  bot_text = context
@@ -1268,7 +964,7 @@ Return ONLY the rewritten guidance."""
1268
  else:
1269
  bot_text = context
1270
 
1271
- # Append escalation if explicitly requested even in steps mode
1272
  needs_escalation = (" escalate" in msg_norm) or ("escalation" in msg_norm)
1273
  if needs_escalation and best_doc:
1274
  esc_text = get_escalation_text(best_doc)
@@ -1278,7 +974,7 @@ Return ONLY the rewritten guidance."""
1278
  if line:
1279
  bot_text = (bot_text or "").rstrip() + "\n\n" + line
1280
 
1281
- # Non-empty guarantee
1282
  if not (bot_text or "").strip():
1283
  if context.strip():
1284
  bot_text = context.strip()
@@ -1291,7 +987,6 @@ Return ONLY the rewritten guidance."""
1291
  short_query = len((input_data.user_message or "").split()) <= 4
1292
  gate_combined_ok = 0.60 if short_query else 0.55
1293
  status = "OK" if (best_combined is not None and best_combined >= gate_combined_ok) else "PARTIAL"
1294
-
1295
  lower = (bot_text or "").lower()
1296
  if ("partial" in lower) or ("may be partial" in lower) or ("closest" in lower) or ("may not fully" in lower):
1297
  status = "PARTIAL"
@@ -1326,10 +1021,9 @@ Return ONLY the rewritten guidance."""
1326
  except Exception as e:
1327
  raise HTTPException(status_code=500, detail=safe_str(e))
1328
 
1329
-
1330
- # ------------------------------------------------------------------------------
1331
  # Ticket description generation
1332
- # ------------------------------------------------------------------------------
1333
  @app.post("/generate_ticket_desc")
1334
  async def generate_ticket_desc_ep(input_data: TicketDescInput):
1335
  try:
@@ -1349,16 +1043,13 @@ async def generate_ticket_desc_ep(input_data: TicketDescInput):
1349
  data = resp.json()
1350
  except Exception:
1351
  return {"ShortDescription": "", "DetailedDescription": "", "error": "Gemini returned non-JSON"}
1352
-
1353
  try:
1354
  text = data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "").strip()
1355
  except Exception:
1356
  return {"ShortDescription": "", "DetailedDescription": "", "error": "Gemini parsing failed"}
1357
-
1358
  if text.startswith("```"):
1359
  lines = [ln for ln in text.splitlines() if not ln.strip().startswith("```")]
1360
  text = "\n".join(lines).strip()
1361
-
1362
  try:
1363
  ticket_json = json.loads(text)
1364
  return {
@@ -1367,14 +1058,12 @@ async def generate_ticket_desc_ep(input_data: TicketDescInput):
1367
  }
1368
  except Exception:
1369
  return {"ShortDescription": "", "DetailedDescription": "", "error": "Invalid JSON returned"}
1370
-
1371
  except Exception as e:
1372
  raise HTTPException(status_code=500, detail=safe_str(e))
1373
 
1374
-
1375
- # ------------------------------------------------------------------------------
1376
  # Incident status
1377
- # ------------------------------------------------------------------------------
1378
  @app.post("/incident_status")
1379
  async def incident_status(input_data: TicketStatusInput):
1380
  try:
@@ -1382,9 +1071,7 @@ async def incident_status(input_data: TicketStatusInput):
1382
  instance_url = os.getenv("SERVICENOW_INSTANCE_URL")
1383
  if not instance_url:
1384
  raise HTTPException(status_code=500, detail="SERVICENOW_INSTANCE_URL missing")
1385
-
1386
  headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
1387
-
1388
  if input_data.sys_id:
1389
  url = f"{instance_url}/api/now/table/incident/{input_data.sys_id}"
1390
  response = requests.get(url, headers=headers, verify=VERIFY_SSL, timeout=25)
@@ -1398,12 +1085,10 @@ async def incident_status(input_data: TicketStatusInput):
1398
  result = (lst or [{}])[0] if response.status_code == 200 else {}
1399
  else:
1400
  raise HTTPException(status_code=400, detail="Provide IncidentID (number) or sys_id")
1401
-
1402
  state_code = builtins.str(result.get("state", "unknown"))
1403
  state_label = STATE_MAP.get(state_code, state_code)
1404
  short = result.get("short_description", "")
1405
  number = result.get("number", input_data.number or "unknown")
1406
-
1407
  return {
1408
  "bot_response": (
1409
  f"**Ticket:** {number} \n"
@@ -1415,14 +1100,12 @@ async def incident_status(input_data: TicketStatusInput):
1415
  "persist": True,
1416
  "debug": "Incident status fetched",
1417
  }
1418
-
1419
  except Exception as e:
1420
  raise HTTPException(status_code=500, detail=safe_str(e))
1421
 
1422
-
1423
- # ------------------------------------------------------------------------------
1424
  # Incident creation
1425
- # ------------------------------------------------------------------------------
1426
  def _classify_resolution_llm(user_message: str) -> bool:
1427
  if not GEMINI_API_KEY:
1428
  return False
@@ -1444,7 +1127,6 @@ Message: {user_message}"""
1444
  except Exception:
1445
  return False
1446
 
1447
-
1448
  def _set_incident_resolved(sys_id: str) -> bool:
1449
  try:
1450
  token = get_valid_token()
@@ -1452,21 +1134,18 @@ def _set_incident_resolved(sys_id: str) -> bool:
1452
  if not instance_url:
1453
  print("[SN PATCH resolve] missing SERVICENOW_INSTANCE_URL")
1454
  return False
1455
-
1456
  headers = {
1457
  "Authorization": f"Bearer {token}",
1458
  "Accept": "application/json",
1459
  "Content-Type": "application/json",
1460
  }
1461
  url = f"{instance_url}/api/now/table/incident/{sys_id}"
1462
-
1463
  close_code_val = os.getenv("SERVICENOW_CLOSE_CODE", "Solution provided")
1464
  close_notes_val = os.getenv("SERVICENOW_RESOLUTION_NOTES", "Issue resolved, user confirmed")
1465
  caller_sysid = os.getenv("SERVICENOW_CALLER_SYSID")
1466
  resolved_by_sysid = os.getenv("SERVICENOW_RESOLVED_BY_SYSID")
1467
  assign_group = os.getenv("SERVICENOW_ASSIGNMENT_GROUP_SYSID")
1468
  require_progress = os.getenv("SERVICENOW_REQUIRE_IN_PROGRESS_FIRST", "false").lower() in ("1", "true", "yes")
1469
-
1470
  if require_progress:
1471
  try:
1472
  resp1 = requests.patch(url, headers=headers, json={"state": "2"}, verify=VERIFY_SSL, timeout=25)
@@ -1511,7 +1190,7 @@ def _set_incident_resolved(sys_id: str) -> bool:
1511
  notes_field = os.getenv("SERVICENOW_RESOLUTION_NOTES_FIELD", "close_notes")
1512
  payload_C = clean({
1513
  "state": "6",
1514
- code_field: close_notes_val, # adjust if custom fields
1515
  notes_field: close_notes_val,
1516
  "caller_id": caller_sysid,
1517
  "resolved_at": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
@@ -1523,13 +1202,11 @@ def _set_incident_resolved(sys_id: str) -> bool:
1523
  if respC.status_code in (200, 204):
1524
  return True
1525
  print(f"[SN PATCH resolve C] status={respC.status_code} body={respC.text[:500]}")
1526
-
1527
  return False
1528
  except Exception as e:
1529
  print(f"[SN PATCH resolve] exception={safe_str(e)}")
1530
  return False
1531
 
1532
-
1533
  @app.post("/incident")
1534
  async def raise_incident(input_data: IncidentInput):
1535
  try:
 
1
 
2
+ # main.py
3
  import os
4
  import json
5
  import re
 
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from pydantic import BaseModel
15
  from dotenv import load_dotenv
16
+ from difflib import SequenceMatcher
17
 
18
+ # KB services
19
  from services.kb_creation import (
20
  collection,
21
  ingest_documents,
 
30
  from services.login import router as login_router
31
  from services.generate_ticket import get_valid_token, create_incident
32
 
33
+ # ---------------------------------------------------------------------
 
34
  # Environment
35
+ # ---------------------------------------------------------------------
36
  load_dotenv()
37
  VERIFY_SSL = os.getenv("SERVICENOW_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
38
  GEMINI_SSL_VERIFY = os.getenv("GEMINI_SSL_VERIFY", "true").lower() in ("1", "true", "yes")
 
43
  )
44
  os.environ["POSTHOG_DISABLED"] = "true"
45
 
 
46
  def safe_str(e: Any) -> str:
47
  try:
48
  return builtins.str(e)
49
  except Exception:
50
  return "<error stringify failed>"
51
 
52
+ # ---------------------------------------------------------------------
 
53
  # App / Lifespan
54
+ # ---------------------------------------------------------------------
55
  @asynccontextmanager
56
  async def lifespan(app: FastAPI):
57
  try:
 
65
  print(f"[KB] ingestion failed: {safe_str(e)}")
66
  yield
67
 
 
68
  app = FastAPI(lifespan=lifespan)
69
  app.include_router(login_router)
70
 
71
+ # CORS
72
  origins = [
73
  "https://chatbotnova-chatbot-frontend.hf.space",
74
+ # "http://localhost:5173", # local dev if needed
75
  ]
76
  app.add_middleware(
77
  CORSMiddleware,
 
81
  allow_headers=["*"],
82
  )
83
 
84
+ # ---------------------------------------------------------------------
 
85
  # Models
86
+ # ---------------------------------------------------------------------
87
  class ChatInput(BaseModel):
88
  user_message: str
89
  prev_status: Optional[str] = None
90
  last_issue: Optional[str] = None
91
 
 
92
  class IncidentInput(BaseModel):
93
  short_description: str
94
  description: str
95
  mark_resolved: Optional[bool] = False
96
 
 
97
  class TicketDescInput(BaseModel):
98
  issue: str
99
 
 
100
  class TicketStatusInput(BaseModel):
101
  sys_id: Optional[str] = None
102
  number: Optional[str] = None
103
 
 
104
  STATE_MAP = {
105
  "1": "New",
106
  "2": "In Progress",
 
110
  "8": "Canceled",
111
  }
112
 
113
+ # ---------------------------------------------------------------------
114
+ # Generic helpers (shared)
115
+ # ---------------------------------------------------------------------
 
116
  NUMBERING_STYLE = os.getenv("NUMBERING_STYLE", "digit").lower() # 'digit' or 'step'
 
117
  DOMAIN_STATUS_TERMS = (
118
  "shipment", "order", "load", "trailer", "wave",
119
  "inventory", "putaway", "receiving", "appointment",
120
  "dock", "door", "manifest", "pallet", "container",
121
  "asn", "grn", "pick", "picking"
122
  )
 
123
  ERROR_FAMILY_SYNS = {
124
  "NOT_FOUND": (
125
  "not found", "missing", "does not exist", "doesn't exist",
 
149
  ),
150
  }
151
 
 
152
  def _detect_error_families(msg: str) -> list:
153
  low = (msg or "").lower()
154
  low_norm = re.sub(r"[^\w\s]", " ", low)
 
159
  fams.append(fam)
160
  return fams
161
 
 
162
  def _is_domain_status_context(msg_norm: str) -> bool:
163
  if "status locked" in msg_norm or "locked status" in msg_norm:
164
  return True
165
  return any(term in msg_norm for term in DOMAIN_STATUS_TERMS)
166
 
 
167
  def _normalize_lines(text: str) -> List[str]:
168
  raw = (text or "")
169
  try:
 
171
  except Exception:
172
  return [raw.strip()] if raw.strip() else []
173
 
174
+ # ---------------- Action filters for steps (create/update/delete) ----------------
175
+ def _filter_numbered_steps_by_actions(numbered_text: str, wanted: set[str], exclude: set[str]) -> str:
 
 
 
 
 
 
 
 
176
  ACTION_SYNONYMS = {
177
  "create": ("create", "creation", "add", "new", "generate"),
178
  "update": ("update", "modify", "change", "edit"),
179
  "delete": ("delete", "remove"),
180
  "navigate": ("navigate", "go to", "open"),
181
  }
 
182
  def _has_any(line: str, keys: set[str]) -> bool:
183
  low = (line or "").lower()
184
  for k in keys:
 
189
 
190
  out_lines = []
191
  for ln in (numbered_text or "").splitlines():
 
192
  if _has_any(ln, exclude):
193
  continue
 
194
  if wanted:
195
  if _has_any(ln, wanted):
196
  out_lines.append(ln)
197
  else:
 
198
  out_lines.append(ln)
 
 
199
  return "\n".join(out_lines).strip() or (numbered_text or "").strip()
200
 
201
+ # ---------------- Small utilities used by next-step & filtering ----------------
202
+ def _dedupe_lines(text: str) -> str:
203
+ seen, out = set(), []
204
+ for ln in (text or "").splitlines():
205
+ key = re.sub(r"\s+", " ", (ln or "").strip().lower())
206
+ if key and key not in seen:
207
+ out.append(ln)
208
+ seen.add(key)
209
+ return "\n".join(out).strip()
210
+
211
+ def _split_sentences(block: str) -> list:
212
+ parts = [t.strip() for t in re.split(r"(?<=[.!?])\s+", block or "") if t.strip()]
213
+ return parts if parts else ([block.strip()] if (block or "").strip() else [])
214
+
215
+ # ------------- Numbering + text normalization used elsewhere ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  def _ensure_numbering(text: str) -> str:
 
 
 
 
217
  text = re.sub(r"[\u2060\u200B]", "", text or "")
218
  lines = [ln.strip() for ln in (text or "").splitlines() if ln and ln.strip()]
219
  if not lines:
220
  return text or ""
 
221
  para = " ".join(lines).strip()
222
  if not para:
223
  return ""
224
+ para_clean = re.sub(r"(?:\b\d+\s*[.\)])\s+", "\n\n\n", para)
225
+ para_clean = re.sub(r"(?:[\u2460-\u2473]\s+)", "\n\n\n", para_clean)
226
+ para_clean = re.sub(r"(?i)\bstep\s*\d+\s*:\s*", "\n\n\n", para_clean)
 
 
 
227
  segments = [seg.strip() for seg in para_clean.split("\n\n\n") if seg.strip()]
 
228
  if len(segments) < 2:
229
  tmp = [ln.strip() for ln in para.splitlines() if ln.strip()]
230
  segments = tmp if len(tmp) > 1 else [seg.strip() for seg in re.split(r"(?<=[.!?])\s+", para) if seg.strip()]
 
232
  def strip_prefix_any(s: str) -> str:
233
  return re.sub(
234
  r"^\s*(?:"
235
+ r"(?:\d+\s*[.\)])|" # 1. / 1)
236
+ r"(?i:step\s*\d+:?)|" # Step 1:
237
+ r"(?:[-*\u2022])|" # bullets
238
+ r"(?:[\u2460-\u2473])" # circled digits
239
+ r")\s*", "", (s or "").strip()
 
 
 
240
  )
 
241
  clean_segments = [strip_prefix_any(seg) for seg in segments if seg.strip()]
 
242
  circled = {
243
  1: "\u2460", 2: "\u2461", 3: "\u2462", 4: "\u2463", 5: "\u2464",
244
  6: "\u2465", 7: "\u2466", 8: "\u2467", 9: "\u2468", 10: "\u2469",
245
  11: "\u246a", 12: "\u246b", 13: "\u246c", 14: "\u246d", 15: "\u246e",
246
  16: "\u246f", 17: "\u2470", 18: "\u2471", 19: "\u2472", 20: "\u2473"
247
  }
 
248
  out = []
249
  for idx, seg in enumerate(clean_segments, start=1):
250
  marker = circled.get(idx, f"{idx})")
251
  out.append(f"{marker} {seg}")
 
252
  return "\n".join(out)
253
 
254
  def _norm_text(s: str) -> str:
 
 
 
 
255
  s = (s or "").lower()
256
  s = re.sub(r"[^\w\s]", " ", s)
257
  s = re.sub(r"\s+", " ", s).strip()
258
  if not s:
259
  return s
 
 
260
  toks = s.split()
261
  stemmed = []
262
  for t in toks:
 
263
  if len(t) > 3 and t.endswith("s"):
264
  t = t[:-1]
 
265
  if len(t) > 4 and t.endswith("ed"):
266
  t = t[:-2]
267
  if len(t) > 5 and t.endswith("ing"):
 
269
  stemmed.append(t)
270
  return " ".join(stemmed).strip()
271
 
 
272
  def _split_sop_into_steps(numbered_text: str) -> list:
 
 
 
 
273
  lines = [ln.strip() for ln in (numbered_text or "").splitlines() if ln.strip()]
274
  steps = []
275
  for ln in lines:
276
  cleaned = re.sub(
277
+ r"^\s*(?:[\u2460-\u2473]|\d+[.)]|[-*•])\s*",
278
  "",
279
  ln
280
  )
 
282
  steps.append(cleaned)
283
  return steps
284
 
285
+ # ---------------- Similarity for anchor-based next steps ----------------
286
+ def _similarity(a: str, b: str) -> float:
287
+ a_norm, b_norm = _norm_text(a), _norm_text(b)
288
+ ta, tb = set(a_norm.split()), set(b_norm.split())
 
 
 
 
 
 
 
 
 
289
  inter = len(ta & tb)
290
+ union = len(ta | tb) or 1
291
+ jacc = inter / union
292
+ def _bigrams(tokens: list) -> set:
 
 
293
  return set([" ".join(tokens[i:i+2]) for i in range(len(tokens)-1)]) if len(tokens) > 1 else set()
294
+ ab, bb = _bigrams(a_norm.split()), _bigrams(b_norm.split())
295
+ big_inter = len(ab & bb)
296
+ big_union = len(ab | bb) or 1
297
+ big = big_inter / big_union
298
+ char = SequenceMatcher(None, a_norm, b_norm).ratio()
299
+ return min(1.0, 0.45*jacc + 0.30*big + 0.35*char)
300
+
301
+ def _extract_anchor_from_query(msg: str) -> dict:
302
+ raw = (msg or "").strip()
303
+ low = _norm_text(raw)
304
+ FOLLOWUP_CUES = ("what next", "what is next", "what to do", "then", "after that", "next")
305
+ has_followup = any(cue in low for cue in FOLLOWUP_CUES)
306
+ parts = [p.strip() for p in re.split(r"[?.,;:\-\n]+", raw) if p.strip()]
307
+ if not parts:
308
+ return {"anchor": raw, "has_followup": has_followup}
309
+ last = parts[-1]
310
+ last_low = _norm_text(last)
311
+ if any(cue in last_low for cue in FOLLOWUP_CUES) and len(parts) >= 2:
312
+ anchor = parts[-2]
313
+ else:
314
+ anchor = parts[-1] if len(parts) > 1 else parts[0]
315
+ return {"anchor": anchor.strip(), "has_followup": has_followup}
 
 
 
 
 
 
 
316
 
317
+ def _anchor_next_steps(user_message: str, numbered_text: str, max_next: int = 8) -> list | None:
318
  steps = _split_sop_into_steps(numbered_text)
319
  if not steps:
320
  return None
321
+ info = _extract_anchor_from_query(user_message)
322
+ anchor = info.get("anchor", "").strip()
323
+ if not anchor:
324
+ return None
325
+ anchor_norm = _norm_text(anchor)
326
+ has_followup = bool(info.get("has_followup"))
327
 
328
+ candidates = []
329
+ for idx, step_line in enumerate(steps):
330
+ s_full = _similarity(anchor, step_line)
331
+ literal_hit = False
332
+ scores = [s_full]
333
+ for s in _split_sentences(step_line):
334
+ scores.append(_similarity(anchor, s))
335
+ a_flat = re.sub(r"\W+", "", anchor_norm)
336
+ s_flat = re.sub(r"\W+", "", _norm_text(s))
337
+ if a_flat and (a_flat in s_flat or s_flat in a_flat):
338
+ literal_hit = True
339
+ score = max(scores)
340
+ candidates.append((idx, score, literal_hit))
341
 
342
+ candidates.sort(key=lambda t: (t[1], t[0]), reverse=True)
343
+ best_idx, best_score, best_literal = candidates[0]
 
 
 
 
 
 
 
 
344
 
345
+ tok_count = len([t for t in anchor_norm.split() if len(t) > 1])
346
+ if best_literal:
347
+ accept = True
348
+ else:
349
+ base_ok = best_score >= (0.55 if not has_followup else 0.50)
350
+ len_ok = (best_score >= 0.40) and (tok_count >= 3)
351
+ accept = base_ok or len_ok
352
+ if not accept:
353
+ return None
354
 
355
  start = best_idx + 1
356
  if start >= len(steps):
357
+ return []
 
358
  end = min(start + max_next, len(steps))
359
+ next_steps = steps[start:end]
360
+ return [ln for ln in _dedupe_lines("\n".join(next_steps)).splitlines() if ln.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
+ # ---------------- Context filtering (neutral/errors rendering) ----------------
363
  def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str, Any]]:
364
  STRICT_OVERLAP = 3
365
  MAX_SENTENCES_STRICT = 4
 
371
  t = re.sub(r"\s+", " ", t).strip()
372
  return t
373
 
374
+ def _split_sents(ctx: str) -> List[str]:
375
  raw_sents = re.split(r"(?<=[.!?])\s+", ctx or "")
376
  return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
377
 
378
  ctx = (context or "").strip()
379
  if not ctx or not query:
380
  return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
 
381
  q_norm = _norm(query)
382
  q_terms = [t for t in q_norm.split() if len(t) > 2]
383
  if not q_terms:
384
  return ctx, {'mode': 'concise', 'matched_count': 0, 'all_sentences': 0}
385
 
386
+ sentences = _split_sents(ctx)
387
  matched_exact, matched_any = [], []
388
  for s in sentences:
389
  s_norm = _norm(s)
 
396
 
397
  if matched_exact:
398
  kept = matched_exact[:MAX_SENTENCES_STRICT]
399
+ return _dedupe_lines("\n".join(kept).strip()), {
400
+ 'mode': 'exact', 'matched_count': len(kept), 'all_sentences': len(sentences)
401
+ }
402
  if matched_any:
403
  kept = matched_any[:MAX_SENTENCES_CONCISE]
404
+ return _dedupe_lines("\n".join(kept).strip()), {
405
+ 'mode': 'concise', 'matched_count': len(kept), 'all_sentences': len(sentences)
406
+ }
407
  kept = sentences[:MAX_SENTENCES_CONCISE]
408
+ return _dedupe_lines("\n".join(kept).strip()), {
409
+ 'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)
410
+ }
411
 
412
  def _extract_errors_only(text: str, max_lines: int = 12) -> str:
413
  kept: List[str] = []
 
418
  break
419
  return "\n".join(kept).strip() if kept else (text or "").strip()
420
 
 
421
  def _filter_permission_lines(text: str, max_lines: int = 6) -> str:
422
  PERM_SYNONYMS = (
423
  "permission", "permissions", "access", "authorization", "authorisation",
 
432
  break
433
  return "\n".join(kept).strip() if kept else (text or "").strip()
434
 
 
435
  def _extract_escalation_line(text: str) -> Optional[str]:
436
  if not text:
437
  return None
438
  lines = _normalize_lines(text)
439
  if not lines:
440
  return None
 
441
  start_idx = None
442
  for i, ln in enumerate(lines):
443
  low = ln.lower()
444
  if "escalation" in low or "escalation path" in low or "escalate" in low:
445
  start_idx = i
446
  break
 
447
  block: List[str] = []
448
  if start_idx is not None:
449
  for j in range(start_idx, min(len(lines), start_idx + 6)):
 
452
  block.append(lines[j].strip())
453
  else:
454
  block = [ln.strip() for ln in lines if ("->" in ln or "→" in ln)]
 
455
  if not block:
456
  return None
 
457
  text_block = " ".join(block)
458
  m = re.search(r"escalation[^:]*:\s*(.+)", text_block, flags=re.IGNORECASE)
459
  path = m.group(1).strip() if m else None
 
460
  if not path:
461
  arrow_lines = [ln for ln in block if ("->" in ln or "→" in ln)]
462
  if arrow_lines:
463
  path = arrow_lines[0]
464
  if not path:
465
+ m2 = re.search(r"(operator.*?administrator.*operator.*)", text_block, flags=re.IGNORECASE)
466
  path = m2.group(1).strip() if m2 else None
467
  if not path:
468
  return None
 
469
  path = path.replace("->", "→").strip()
470
  path = re.sub(r"^(?i:escalation\s*path)\s*:\s*", "", path).strip()
471
  return f"If you want to escalate the issue, follow: {path}"
472
 
 
473
  def _detect_language_hint(msg: str) -> Optional[str]:
474
  if re.search(r"[\u0B80-\u0BFF]", msg or ""): # Tamil
475
  return "Tamil"
 
477
  return "Hindi"
478
  return None
479
 
 
480
  def _build_clarifying_message() -> str:
481
+ return ("It seems the issue isn’t resolved yet. Would you like to share a few details so I can check further, "
482
+ "or should I raise a ServiceNow ticket for you?")
 
 
 
483
 
484
  def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[str, str]:
485
  issue = (issue_text or "").strip()
 
492
  ).strip()
493
  return short_desc, long_desc
494
 
 
495
  def _is_incident_intent(msg_norm: str) -> bool:
496
  intent_phrases = [
497
  "create ticket", "create a ticket", "raise ticket", "raise a ticket", "open ticket", "open a ticket",
 
501
  ]
502
  return any(p in msg_norm for p in intent_phrases)
503
 
 
504
  def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
505
  status_keywords = ["status", "ticket status", "incident status", "check status", "check ticket status", "check incident status"]
506
  base_has_status = any(k in msg_norm for k in status_keywords)
 
508
  any(w in msg_norm for w in ("ticket", "incident", "servicenow", "snow"))
509
  or bool(re.search(r"\binc\d{5,}\b", msg_norm, flags=re.IGNORECASE))
510
  )
 
511
  if (not base_has_status) or (base_has_status and not has_ticket_marker and _is_domain_status_context(msg_norm)):
512
  return {}
 
513
  patterns = [
514
  r"(?:incident\s*id|incidentid|ticket\s*number|number)\s*[:=]?\s*(inc\d+)",
515
  r"(inc\d+)"
 
522
  return {"number": val.upper() if val.lower().startswith("inc") else val}
523
  return {"number": None, "ask_number": True}
524
 
 
525
  def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
526
  phrases = [
527
  "it is resolved", "resolved", "issue resolved", "problem resolved",
 
530
  ]
531
  return any(p in msg_norm for p in phrases)
532
 
 
533
  def _has_negation_resolved(msg_norm: str) -> bool:
534
  neg_phrases = [
535
  "not resolved", "issue not resolved", "still not working", "not working",
 
537
  ]
538
  return any(p in msg_norm for p in neg_phrases)
539
 
 
540
  def _find_prereq_section_text(best_doc: str) -> str:
 
541
  variants = [
542
+ "Pre-Requisites", "Prerequisites", "Pre Requisites", "Pre-Requirements", "Requirements",
 
 
 
 
543
  ]
544
  for title in variants:
545
  txt = get_section_text(best_doc, title)
 
547
  return txt.strip()
548
  return ""
549
 
550
+ # ---------------------------------------------------------------------
 
551
  # Health
552
+ # ---------------------------------------------------------------------
553
  @app.get("/")
554
  async def health_check():
555
  return {"status": "ok"}
556
 
557
+ # ---------------------------------------------------------------------
 
558
  # Chat
559
+ # ---------------------------------------------------------------------
560
  @app.post("/chat")
561
  async def chat_with_ai(input_data: ChatInput):
562
  assist_followup: Optional[str] = None
563
  try:
564
  msg_norm = (input_data.user_message or "").lower().strip()
565
 
566
+ # yes/no handlers
567
  if msg_norm in ("yes", "y", "sure", "ok", "okay"):
568
  return {
569
  "bot_response": "Great! Tell me what you'd like to do next — check another ticket, create an incident, or describe your issue.",
 
582
  "debug": {"intent": "end_conversation"},
583
  }
584
 
585
+ # resolution ack (auto incident + mark Resolved)
586
  is_llm_resolved = _classify_resolution_llm(input_data.user_message)
587
  if _has_negation_resolved(msg_norm):
588
  is_llm_resolved = False
 
589
  if (not _has_negation_resolved(msg_norm)) and (_is_resolution_ack_heuristic(msg_norm) or is_llm_resolved):
590
  try:
591
  short_desc, long_desc = _build_tracking_descriptions(input_data.last_issue, input_data.user_message)
 
637
  "debug": {"intent": "resolved_ack", "exception": True},
638
  }
639
 
640
+ # incident intent
641
  if _is_incident_intent(msg_norm):
642
  return {
643
+ "bot_response": "Okay, let's create a ServiceNow incident.",
 
 
644
  "status": (input_data.prev_status or "PARTIAL"),
645
  "context_found": False,
646
  "ask_resolved": False,
 
652
  "debug": {"intent": "create_ticket"},
653
  }
654
 
655
+ # ticket status
656
  status_intent = _parse_ticket_status_intent(msg_norm)
657
  if status_intent:
658
  if status_intent.get("ask_number"):
 
676
  instance_url = os.getenv("SERVICENOW_INSTANCE_URL")
677
  if not instance_url:
678
  raise HTTPException(status_code=500, detail="SERVICENOW_INSTANCE_URL missing")
 
679
  headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
680
  number = status_intent.get("number")
681
  url = f"{instance_url}/api/now/table/incident?number={number}"
 
683
  data = response.json()
684
  lst = data.get("result", [])
685
  result = (lst or [{}])[0] if response.status_code == 200 else {}
 
686
  state_code = builtins.str(result.get("state", "unknown"))
687
  state_label = STATE_MAP.get(state_code, state_code)
688
  short = result.get("short_description", "")
689
  num = result.get("number", number or "unknown")
 
690
  return {
691
  "bot_response": (
692
  f"**Ticket:** {num}\n"
 
706
  except Exception as e:
707
  raise HTTPException(status_code=500, detail=safe_str(e))
708
 
709
+ # ------------------ Hybrid KB search ------------------
 
 
710
  kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
711
  documents = kb_results.get("documents", [])
712
  metadatas = kb_results.get("metadatas", [])
 
730
 
731
  selected = items[:max(1, 2)]
732
  context_raw = "\n\n---\n\n".join([s["text"] for s in selected]) if selected else ""
 
733
  filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
734
  context = filtered_text
735
  context_found = bool(context.strip())
736
 
737
+ best_distance = (min([d for d in distances if d is not None], default=None) if distances else None)
738
+ best_combined = (max([c for c in combined if c is not None], default=None) if combined else None)
 
 
 
 
739
 
740
  detected_intent = kb_results.get("user_intent", "neutral")
741
  best_doc = kb_results.get("best_doc")
742
  top_meta = (metadatas or [{}])[0] if metadatas else {}
743
  msg_low = (input_data.user_message or "").lower()
 
744
  GENERIC_ERROR_TERMS = ("error", "issue", "problem", "not working", "failed", "failure")
745
  generic_error_signal = any(t in msg_low for t in GENERIC_ERROR_TERMS)
746
 
747
+ # intent nudge for prereqs
748
+ PREREQ_TERMS = ("pre req", "pre-requisite", "pre-requisites", "prerequisite",
749
+ "prerequisites", "pre requirement", "pre-requirements", "requirements")
 
 
750
  if detected_intent == "neutral" and any(t in msg_low for t in PREREQ_TERMS):
751
  detected_intent = "prereqs"
752
 
753
+ # permission queries force 'errors'
754
  PERM_QUERY_TERMS = [
755
  "permission", "permissions", "access", "access right", "authorization", "authorisation",
756
  "role", "role access", "security", "security profile", "privilege",
 
760
  if is_perm_query:
761
  detected_intent = "errors"
762
 
763
+ # heading-aware prereq nudge
764
  sec_title = ((top_meta or {}).get("section") or "").strip().lower()
765
+ PREREQ_HEADINGS = ("pre-requisites", "prerequisites", "pre requisites", "pre-requirements", "requirements")
 
 
 
766
  if detected_intent == "neutral" and any(h in sec_title for h in PREREQ_HEADINGS):
767
  detected_intent = "prereqs"
768
 
769
+ # gating
 
 
 
 
 
 
 
770
  def _contains_any(s: str, keywords: tuple) -> bool:
771
  low = (s or "").lower()
772
  return any(k in low for k in keywords)
 
784
  "error", "issue", "fail", "failed", "not working", "locked", "mismatch",
785
  "access", "permission", "status"
786
  )
 
787
  matched_count = int(filt_info.get("matched_count") or 0)
788
  filter_mode = (filt_info.get("mode") or "").lower()
789
  has_any_action_or_error = _contains_any(msg_low, ACTION_OR_ERROR_TERMS)
790
  mentions_domain = _contains_any(msg_low, DOMAIN_TERMS)
 
791
  short_query = len((input_data.user_message or "").split()) <= 4
792
  gate_combined_ok = 0.60 if short_query else 0.55
793
  combined_ok = (best_combined is not None and best_combined >= gate_combined_ok)
794
  weak_domain_only = (mentions_domain and not has_any_action_or_error)
795
  low_context_hit = (matched_count < 2 and filter_mode in ("concise", "exact"))
796
+ strong_steps_bypass = True
 
797
  strong_error_signal = len(_detect_error_families(msg_low)) > 0
798
 
799
  if (weak_domain_only or (low_context_hit and not combined_ok)) \
 
822
  },
823
  }
824
 
825
+ # ---------- Build SOP context ----------
826
  escalation_line: Optional[str] = None
827
  full_errors: Optional[str] = None
828
  next_step_applied = False
 
830
  context_preformatted = False
831
 
832
  if best_doc and detected_intent == "steps":
833
+ # prefer exact section of the top hit; fallback to all steps
834
  sec = (top_meta or {}).get("section")
835
  if sec:
836
+ full_steps = get_section_text(best_doc, sec)
 
837
  else:
838
+ full_steps = get_best_steps_section_text(best_doc)
839
+
 
840
  if full_steps:
841
  numbered_full = _ensure_numbering(full_steps)
842
+
843
+ # action filtering (create/update/delete) – only when user clearly asks
844
  raw_actions = set((kb_results.get("actions") or []))
845
+ msg_low2 = (input_data.user_message or "").lower()
846
+ if not raw_actions and ("creation" in msg_low2 or "create" in msg_low2 or "set up" in msg_low2 or "setup" in msg_low2):
 
847
  raw_actions = {"create"}
848
+ elif not raw_actions and ("update" in msg_low2 or "modify" in msg_low2 or "edit" in msg_low2 or "change" in msg_low2):
849
  raw_actions = {"update"}
850
+ elif not raw_actions and ("delete" in msg_low2 or "remove" in msg_low2 or "cancel" in msg_low2 or "void" in msg_low2):
851
  raw_actions = {"delete"}
852
+
853
  wanted, exclude = set(), set()
854
  if "create" in raw_actions and not ({"update", "delete"} & raw_actions):
855
  wanted, exclude = {"create"}, {"update", "delete"}
 
857
  wanted, exclude = {"update"}, {"create", "delete"}
858
  elif "delete" in raw_actions and not ({"create", "update"} & raw_actions):
859
  wanted, exclude = {"delete"}, {"create", "update"}
860
+
861
  if wanted or exclude:
862
+ numbered_full = _filter_numbered_steps_by_actions(numbered_full, wanted=wanted, exclude=exclude)
863
+
864
+ # --- NEW: keyword-free anchor-based next-step resolver ---
865
+ next_only = _anchor_next_steps(input_data.user_message, numbered_full, max_next=6)
866
+
 
 
 
 
 
 
 
867
  if next_only is not None:
868
  if len(next_only) == 0:
869
  context = "You are at the final step of this SOP. No further steps."
 
872
  context_preformatted = True
873
  else:
874
  context = _format_steps_as_numbered(next_only)
875
+ context = _dedupe_lines(context)
876
  next_step_applied = True
877
  next_step_info = {"count": len(next_only)}
878
  context_preformatted = True
879
  else:
880
+ context = numbered_full
881
+ context_preformatted = True
882
 
883
  # clear filter info for debug clarity
884
  filt_info = {'mode': None, 'matched_count': None, 'all_sentences': None}
 
901
  if re.match(r"^\s*[\-\*\u2022]\s*", ln) or (":" in ln)
902
  ]
903
  context = "\n".join(error_bullets[:8]).strip()
904
+ assist_followup = (
905
+ "Please tell me which error above matches your screen (paste the exact text), "
906
+ "or share a screenshot. I can guide you further or raise a ServiceNow ticket."
907
+ )
908
  escalation_line = _extract_escalation_line(full_errors)
909
 
910
  elif best_doc and detected_intent == "prereqs":
 
912
  if full_prereqs:
913
  context = full_prereqs.strip()
914
  context_found = True
 
915
  else:
916
+ # neutral or other intents: keep filtered context (already set as 'context')
917
+ pass
918
 
919
+ # language hint & paraphrase (errors only)
920
  language_hint = _detect_language_hint(input_data.user_message)
921
  lang_line = f"Respond in {language_hint}." if language_hint else "Respond in a clear, polite tone."
922
  use_gemini = (detected_intent == "errors")
 
923
  enhanced_prompt = f"""You are a helpful support assistant. Rewrite the provided context ONLY into clear, user-friendly guidance.
924
  - Do not add any information that is not present in the context.
925
  - If the content is an error/access/permission note, paraphrase it into a helpful sentence users can understand.
 
930
  {input_data.user_message}
931
  ### Output
932
  Return ONLY the rewritten guidance."""
 
933
  headers = {"Content-Type": "application/json"}
934
  payload = {"contents": [{"parts": [{"text": enhanced_prompt}]}]}
935
  bot_text = ""
 
950
  except Exception:
951
  bot_text, http_code = "", 0
952
 
953
+ # deterministic local formatting
954
  if detected_intent == "steps":
955
  if context_preformatted:
956
  bot_text = context
 
964
  else:
965
  bot_text = context
966
 
967
+ # explicit escalation add in steps if user asked
968
  needs_escalation = (" escalate" in msg_norm) or ("escalation" in msg_norm)
969
  if needs_escalation and best_doc:
970
  esc_text = get_escalation_text(best_doc)
 
974
  if line:
975
  bot_text = (bot_text or "").rstrip() + "\n\n" + line
976
 
977
+ # non-empty guarantee
978
  if not (bot_text or "").strip():
979
  if context.strip():
980
  bot_text = context.strip()
 
987
  short_query = len((input_data.user_message or "").split()) <= 4
988
  gate_combined_ok = 0.60 if short_query else 0.55
989
  status = "OK" if (best_combined is not None and best_combined >= gate_combined_ok) else "PARTIAL"
 
990
  lower = (bot_text or "").lower()
991
  if ("partial" in lower) or ("may be partial" in lower) or ("closest" in lower) or ("may not fully" in lower):
992
  status = "PARTIAL"
 
1021
  except Exception as e:
1022
  raise HTTPException(status_code=500, detail=safe_str(e))
1023
 
1024
+ # ---------------------------------------------------------------------
 
1025
  # Ticket description generation
1026
+ # ---------------------------------------------------------------------
1027
  @app.post("/generate_ticket_desc")
1028
  async def generate_ticket_desc_ep(input_data: TicketDescInput):
1029
  try:
 
1043
  data = resp.json()
1044
  except Exception:
1045
  return {"ShortDescription": "", "DetailedDescription": "", "error": "Gemini returned non-JSON"}
 
1046
  try:
1047
  text = data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "").strip()
1048
  except Exception:
1049
  return {"ShortDescription": "", "DetailedDescription": "", "error": "Gemini parsing failed"}
 
1050
  if text.startswith("```"):
1051
  lines = [ln for ln in text.splitlines() if not ln.strip().startswith("```")]
1052
  text = "\n".join(lines).strip()
 
1053
  try:
1054
  ticket_json = json.loads(text)
1055
  return {
 
1058
  }
1059
  except Exception:
1060
  return {"ShortDescription": "", "DetailedDescription": "", "error": "Invalid JSON returned"}
 
1061
  except Exception as e:
1062
  raise HTTPException(status_code=500, detail=safe_str(e))
1063
 
1064
+ # ---------------------------------------------------------------------
 
1065
  # Incident status
1066
+ # ---------------------------------------------------------------------
1067
  @app.post("/incident_status")
1068
  async def incident_status(input_data: TicketStatusInput):
1069
  try:
 
1071
  instance_url = os.getenv("SERVICENOW_INSTANCE_URL")
1072
  if not instance_url:
1073
  raise HTTPException(status_code=500, detail="SERVICENOW_INSTANCE_URL missing")
 
1074
  headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
 
1075
  if input_data.sys_id:
1076
  url = f"{instance_url}/api/now/table/incident/{input_data.sys_id}"
1077
  response = requests.get(url, headers=headers, verify=VERIFY_SSL, timeout=25)
 
1085
  result = (lst or [{}])[0] if response.status_code == 200 else {}
1086
  else:
1087
  raise HTTPException(status_code=400, detail="Provide IncidentID (number) or sys_id")
 
1088
  state_code = builtins.str(result.get("state", "unknown"))
1089
  state_label = STATE_MAP.get(state_code, state_code)
1090
  short = result.get("short_description", "")
1091
  number = result.get("number", input_data.number or "unknown")
 
1092
  return {
1093
  "bot_response": (
1094
  f"**Ticket:** {number} \n"
 
1100
  "persist": True,
1101
  "debug": "Incident status fetched",
1102
  }
 
1103
  except Exception as e:
1104
  raise HTTPException(status_code=500, detail=safe_str(e))
1105
 
1106
+ # ---------------------------------------------------------------------
 
1107
  # Incident creation
1108
+ # ---------------------------------------------------------------------
1109
  def _classify_resolution_llm(user_message: str) -> bool:
1110
  if not GEMINI_API_KEY:
1111
  return False
 
1127
  except Exception:
1128
  return False
1129
 
 
1130
  def _set_incident_resolved(sys_id: str) -> bool:
1131
  try:
1132
  token = get_valid_token()
 
1134
  if not instance_url:
1135
  print("[SN PATCH resolve] missing SERVICENOW_INSTANCE_URL")
1136
  return False
 
1137
  headers = {
1138
  "Authorization": f"Bearer {token}",
1139
  "Accept": "application/json",
1140
  "Content-Type": "application/json",
1141
  }
1142
  url = f"{instance_url}/api/now/table/incident/{sys_id}"
 
1143
  close_code_val = os.getenv("SERVICENOW_CLOSE_CODE", "Solution provided")
1144
  close_notes_val = os.getenv("SERVICENOW_RESOLUTION_NOTES", "Issue resolved, user confirmed")
1145
  caller_sysid = os.getenv("SERVICENOW_CALLER_SYSID")
1146
  resolved_by_sysid = os.getenv("SERVICENOW_RESOLVED_BY_SYSID")
1147
  assign_group = os.getenv("SERVICENOW_ASSIGNMENT_GROUP_SYSID")
1148
  require_progress = os.getenv("SERVICENOW_REQUIRE_IN_PROGRESS_FIRST", "false").lower() in ("1", "true", "yes")
 
1149
  if require_progress:
1150
  try:
1151
  resp1 = requests.patch(url, headers=headers, json={"state": "2"}, verify=VERIFY_SSL, timeout=25)
 
1190
  notes_field = os.getenv("SERVICENOW_RESOLUTION_NOTES_FIELD", "close_notes")
1191
  payload_C = clean({
1192
  "state": "6",
1193
+ code_field: close_notes_val, # adjust if custom fields are mapped differently
1194
  notes_field: close_notes_val,
1195
  "caller_id": caller_sysid,
1196
  "resolved_at": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
 
1202
  if respC.status_code in (200, 204):
1203
  return True
1204
  print(f"[SN PATCH resolve C] status={respC.status_code} body={respC.text[:500]}")
 
1205
  return False
1206
  except Exception as e:
1207
  print(f"[SN PATCH resolve] exception={safe_str(e)}")
1208
  return False
1209
 
 
1210
  @app.post("/incident")
1211
  async def raise_incident(input_data: IncidentInput):
1212
  try: