srilakshu012456 commited on
Commit
7815846
·
verified ·
1 Parent(s): 08c1dac

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +89 -83
main.py CHANGED
@@ -16,8 +16,9 @@ from services.kb_creation import (
16
  collection,
17
  ingest_documents,
18
  hybrid_search_knowledge_base,
19
- get_section_text, # NEW
20
- get_best_steps_section_text, # NEW
 
21
  )
22
 
23
  from services.login import router as login_router
@@ -92,6 +93,7 @@ GEMINI_URL = (
92
  f"gemini-2.5-flash-lite:generateContent?key={GEMINI_API_KEY}"
93
  )
94
 
 
95
  def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2) -> Dict[str, Any]:
96
  if not kb_results or not isinstance(kb_results, dict):
97
  return {"context": "", "sources": [], "top_hits": [], "context_found": False, "best_score": None, "best_combined": None}
@@ -162,6 +164,7 @@ def _build_clarifying_message() -> str:
162
  "Reply with these details and I’ll search again."
163
  )
164
 
 
165
  def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[str, str]:
166
  issue = (issue_text or "").strip()
167
  resolved = (resolved_text or "").strip()
@@ -292,7 +295,7 @@ def _filter_context_for_query(context: str, query: str) -> Tuple[str, Dict[str,
292
  kept = sentences[:MAX_SENTENCES_CONCISE]
293
  return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
294
 
295
- # ---------- intent & action specific extractors ----------
296
  STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", re.IGNORECASE)
297
  NAV_LINE_REGEX = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
298
 
@@ -309,21 +312,6 @@ NON_PROC_PHRASES = [
309
  ]
310
  NON_PROC_ANY_REGEX = re.compile("|".join([re.escape(v) for v in NON_PROC_PHRASES]), re.IGNORECASE)
311
 
312
- ACTION_SYNS_FLAT = {
313
- "create": ["create", "creation", "add", "new", "generate"],
314
- "update": ["update", "modify", "change", "edit"],
315
- "delete": ["delete", "remove"],
316
- "navigate": ["navigate", "go to", "open"],
317
- }
318
-
319
- def _action_in_line(ln: str, target_actions: List[str]) -> bool:
320
- s = (ln or "").lower()
321
- for act in target_actions:
322
- for syn in ACTION_SYNS_FLAT.get(act, [act]):
323
- if syn in s:
324
- return True
325
- return False
326
-
327
  def _is_procedural_line(ln: str) -> bool:
328
  s = (ln or "").strip()
329
  if not s:
@@ -340,14 +328,11 @@ def _is_procedural_line(ln: str) -> bool:
340
  return True
341
  return False
342
 
343
- def _extract_steps_only(text: str, max_lines: Optional[int] = 12, target_actions: Optional[List[str]] = None) -> str:
344
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
345
  kept = []
346
  for ln in lines:
347
  if _is_procedural_line(ln):
348
- if target_actions:
349
- if not _action_in_line(ln, target_actions):
350
- continue
351
  kept.append(ln)
352
  if max_lines is not None and len(kept) >= max_lines:
353
  break
@@ -374,30 +359,39 @@ def _extract_errors_only(text: str, max_lines: int = 10) -> str:
374
  return "\n".join(kept).strip() if kept else (text or "").strip()
375
 
376
  def _format_steps_markdown(lines: List[str]) -> str:
377
- """
378
- Convert a list of step lines into a clean Markdown numbered list.
379
- Keeps original order, trims whitespace, skips empty lines.
380
- """
381
- items = []
382
  for i, ln in enumerate(lines, start=1):
383
  s = (ln or "").strip()
384
  if not s:
385
  continue
386
- # If the line already has leading "1. " or "• ", strip it so numbering is consistent
387
  s = re.sub(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", "", s).strip()
388
  items.append(f"{i}. {s}")
389
  return "\n".join(items).strip()
390
-
 
 
 
 
 
 
 
 
 
 
 
391
  @app.get("/")
392
  async def health_check():
393
  return {"status": "ok"}
394
 
 
395
  @app.post("/chat")
396
  async def chat_with_ai(input_data: ChatInput):
397
  try:
398
  msg_norm = (input_data.user_message or "").lower().strip()
399
 
400
- # --- Yes/No handlers ---
401
  if msg_norm in ("yes", "y", "sure", "ok", "okay"):
402
  return {
403
  "bot_response": ("Great! Tell me what you’d like to do next — check another ticket, create an incident, or describe your issue."),
@@ -416,7 +410,7 @@ async def chat_with_ai(input_data: ChatInput):
416
  "debug": {"intent": "end_conversation"},
417
  }
418
 
419
- # --- Resolution ack ---
420
  is_llm_resolved = _classify_resolution_llm(input_data.user_message)
421
  if _has_negation_resolved(msg_norm):
422
  is_llm_resolved = False
@@ -469,7 +463,7 @@ async def chat_with_ai(input_data: ChatInput):
469
  "debug": {"intent": "resolved_ack", "exception": True},
470
  }
471
 
472
- # --- Incident intent ---
473
  if _is_incident_intent(msg_norm):
474
  return {
475
  "bot_response": (
@@ -488,7 +482,7 @@ async def chat_with_ai(input_data: ChatInput):
488
  "debug": {"intent": "create_ticket"},
489
  }
490
 
491
- # --- Generic opener ---
492
  if _is_generic_issue(msg_norm):
493
  return {
494
  "bot_response": (
@@ -509,7 +503,7 @@ async def chat_with_ai(input_data: ChatInput):
509
  "debug": {"intent": "generic_issue"},
510
  }
511
 
512
- # --- Status intent ---
513
  status_intent = _parse_ticket_status_intent(msg_norm)
514
  if status_intent:
515
  if status_intent.get("ask_number"):
@@ -560,39 +554,58 @@ async def chat_with_ai(input_data: ChatInput):
560
  except Exception as e:
561
  raise HTTPException(status_code=500, detail=safe_str(e))
562
 
563
- # --- Hybrid KB search ---
564
  kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
565
  kb_ctx = extract_kb_context(kb_results, top_chunks=2)
566
  context_raw = kb_ctx.get("context", "") or ""
 
567
 
568
  filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
569
  context = filtered_text
570
  context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
571
  best_distance = kb_ctx.get("best_score")
572
  best_combined = kb_ctx.get("best_combined")
573
- detected_intent = kb_results.get("user_intent", "neutral")
574
- actions = kb_results.get("actions", [])
575
  best_doc = kb_results.get("best_doc")
576
  top_meta = (kb_results.get("metadatas") or [{}])[0] if (kb_results.get("metadatas") or []) else {}
577
 
578
- # --- FULL SECTION when strongly found & steps intent ---
579
- if detected_intent == "steps" and best_doc:
580
- # prefer full 'Process Steps' section from the best SOP
581
  full_steps = get_best_steps_section_text(best_doc)
582
- if not full_steps:
583
- # fallback: full text of the top section
584
- sec = (top_meta or {}).get("section")
585
- if sec:
586
- full_steps = get_section_text(best_doc, sec)
587
  if full_steps:
588
- # If confidence is high, return ALL procedural lines (no truncation)
589
- high_conf = (best_combined is not None and best_combined >= 0.75)
590
- context = _extract_steps_only(full_steps, max_lines=None if high_conf else 20, target_actions=actions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
- # Intent-shaping (secondary; if not already handled above)
593
  q = (input_data.user_message or "").lower()
594
  if detected_intent == "steps" or any(k in q for k in ["steps", "procedure", "perform", "do", "process"]):
595
- context = _extract_steps_only(context, max_lines=None if (best_combined and best_combined >= 0.75) else 12, target_actions=actions)
596
  elif detected_intent == "errors" or any(k in q for k in ["error", "issue", "fail", "not working", "resolution", "fix"]):
597
  context = _extract_errors_only(context, max_lines=10)
598
  elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
@@ -626,12 +639,11 @@ async def chat_with_ai(input_data: ChatInput):
626
  "debug": {"used_chunks": 0, "second_try": second_try, "best_distance": best_distance, "best_combined": best_combined},
627
  }
628
 
629
- # LLM rewrite (kept, but we still fallback cleanly)
630
  enhanced_prompt = (
631
  "From the provided context, output only the actionable steps/procedure relevant to the user's question. "
632
  "Use ONLY the provided context; do NOT add information that is not present. "
633
- + ("Return ONLY lines containing the requested action verbs. " if actions else "")
634
- + "Do NOT include document names, section titles, or 'Source:' lines.\n\n"
635
  f"### Context\n{context}\n\n"
636
  f"### Question\n{input_data.user_message}\n\n"
637
  "### Output\n"
@@ -658,33 +670,26 @@ async def chat_with_ai(input_data: ChatInput):
658
  if not bot_text.strip():
659
  bot_text = context
660
  bot_text = _strip_any_source_lines(bot_text).strip()
661
-
662
- # If the intent is steps, render lines as a numbered Markdown list
663
- if kb_results.get("user_intent", "neutral") == "steps":
664
- raw_lines = [ln.strip() for ln in bot_text.splitlines() if ln.strip()]
665
-
666
- # If everything is on a single line, split defensively on ". "
667
- if len(raw_lines) == 1:
668
- parts = [p.strip() for p in re.split(r"\.\s+(?=[A-Z0-9])", raw_lines[0]) if p.strip()]
669
- raw_lines = parts if len(parts) > 1 else raw_lines
670
-
671
- # 🔴 NEW: merge number-only lines with the next line
672
- merged: list[str] = []
673
- i = 0
674
- while i < len(raw_lines):
675
- curr = raw_lines[i]
676
- # A number-only line (e.g., "1", "2", "3")
677
- if re.fullmatch(r"\d+", curr) and (i + 1) < len(raw_lines):
678
- nxt = raw_lines[i + 1].strip()
679
- # Combine into one line: "1. <next line text>"
680
- merged.append(f"{curr}. {nxt}")
681
- i += 2 # skip the next line; already merged
682
- else:
683
- merged.append(curr)
684
- i += 1
685
-
686
- # Finally: normalize and render as Markdown numbered list
687
- bot_text = _format_steps_markdown(merged)
688
 
689
  status = "OK" if (
690
  (best_combined is not None and best_combined >= gate_combined_ok)
@@ -712,7 +717,7 @@ async def chat_with_ai(input_data: ChatInput):
712
  "filter_mode": filt_info.get("mode"),
713
  "matched_count": filt_info.get("matched_count"),
714
  "user_intent": detected_intent,
715
- "actions": actions,
716
  "best_doc": best_doc,
717
  },
718
  }
@@ -722,6 +727,7 @@ async def chat_with_ai(input_data: ChatInput):
722
  except Exception as e:
723
  raise HTTPException(status_code=500, detail=safe_str(e))
724
 
 
725
  def _set_incident_resolved(sys_id: str) -> bool:
726
  try:
727
  token = get_valid_token()
@@ -842,7 +848,7 @@ async def generate_ticket_desc_ep(input_data: TicketDescInput):
842
  )
843
  headers = {"Content-Type": "application/json"}
844
  payload = {"contents": [{"parts": [{"text": prompt}]}]}
845
- resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=GEMINI_GSL_VERIFY if 'GEMINI_GSL_VERIFY' in globals() else GEMINI_SSL_VERIFY)
846
  try:
847
  data = resp.json()
848
  except Exception:
@@ -904,7 +910,7 @@ async def incident_status(input_data: TicketStatusInput):
904
  except Exception as e:
905
  raise HTTPException(status_code=500, detail=safe_str(e))
906
 
907
- # ---- Admin endpoints (optional) ----
908
  @app.get("/kb/info")
909
  async def kb_info():
910
  from services.kb_creation import get_kb_runtime_info
 
16
  collection,
17
  ingest_documents,
18
  hybrid_search_knowledge_base,
19
+ detect_user_intent, # NEW semantic intent
20
+ get_section_text,
21
+ get_best_steps_section_text,
22
  )
23
 
24
  from services.login import router as login_router
 
93
  f"gemini-2.5-flash-lite:generateContent?key={GEMINI_API_KEY}"
94
  )
95
 
96
+ # ---------- Helpers: context merge + sanitation ----------
97
  def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2) -> Dict[str, Any]:
98
  if not kb_results or not isinstance(kb_results, dict):
99
  return {"context": "", "sources": [], "top_hits": [], "context_found": False, "best_score": None, "best_combined": None}
 
164
  "Reply with these details and I’ll search again."
165
  )
166
 
167
+ # ---------- Intent helpers ----------
168
  def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> Tuple[str, str]:
169
  issue = (issue_text or "").strip()
170
  resolved = (resolved_text or "").strip()
 
295
  kept = sentences[:MAX_SENTENCES_CONCISE]
296
  return "\n".join(kept).strip(), {'mode': 'concise', 'matched_count': 0, 'all_sentences': len(sentences)}
297
 
298
+ # ---------- intent & formatting extractors ----------
299
  STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", re.IGNORECASE)
300
  NAV_LINE_REGEX = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
301
 
 
312
  ]
313
  NON_PROC_ANY_REGEX = re.compile("|".join([re.escape(v) for v in NON_PROC_PHRASES]), re.IGNORECASE)
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  def _is_procedural_line(ln: str) -> bool:
316
  s = (ln or "").strip()
317
  if not s:
 
328
  return True
329
  return False
330
 
331
+ def _extract_steps_only(text: str, max_lines: Optional[int] = 12) -> str:
332
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
333
  kept = []
334
  for ln in lines:
335
  if _is_procedural_line(ln):
 
 
 
336
  kept.append(ln)
337
  if max_lines is not None and len(kept) >= max_lines:
338
  break
 
359
  return "\n".join(kept).strip() if kept else (text or "").strip()
360
 
361
  def _format_steps_markdown(lines: List[str]) -> str:
362
+ """Convert step lines to a clean Markdown numbered list."""
363
+ items: List[str] = []
 
 
 
364
  for i, ln in enumerate(lines, start=1):
365
  s = (ln or "").strip()
366
  if not s:
367
  continue
368
+ # Strip existing numbering/bullets to avoid double-numbering
369
  s = re.sub(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", "", s).strip()
370
  items.append(f"{i}. {s}")
371
  return "\n".join(items).strip()
372
+
373
+ def _format_bullets_markdown(lines: List[str]) -> str:
374
+ items: List[str] = []
375
+ for ln in lines:
376
+ s = (ln or "").strip()
377
+ if not s:
378
+ continue
379
+ s = re.sub(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", "", s).strip()
380
+ items.append(f"- {s}")
381
+ return "\n".join(items).strip()
382
+
383
+ # ---------- Health ----------
384
  @app.get("/")
385
  async def health_check():
386
  return {"status": "ok"}
387
 
388
+ # ---------- Chat endpoint ----------
389
  @app.post("/chat")
390
  async def chat_with_ai(input_data: ChatInput):
391
  try:
392
  msg_norm = (input_data.user_message or "").lower().strip()
393
 
394
+ # Yes/No handlers
395
  if msg_norm in ("yes", "y", "sure", "ok", "okay"):
396
  return {
397
  "bot_response": ("Great! Tell me what you’d like to do next — check another ticket, create an incident, or describe your issue."),
 
410
  "debug": {"intent": "end_conversation"},
411
  }
412
 
413
+ # Resolution acknowledgement
414
  is_llm_resolved = _classify_resolution_llm(input_data.user_message)
415
  if _has_negation_resolved(msg_norm):
416
  is_llm_resolved = False
 
463
  "debug": {"intent": "resolved_ack", "exception": True},
464
  }
465
 
466
+ # Incident intent
467
  if _is_incident_intent(msg_norm):
468
  return {
469
  "bot_response": (
 
482
  "debug": {"intent": "create_ticket"},
483
  }
484
 
485
+ # Generic opener → ask for details first
486
  if _is_generic_issue(msg_norm):
487
  return {
488
  "bot_response": (
 
503
  "debug": {"intent": "generic_issue"},
504
  }
505
 
506
+ # Ticket status
507
  status_intent = _parse_ticket_status_intent(msg_norm)
508
  if status_intent:
509
  if status_intent.get("ask_number"):
 
554
  except Exception as e:
555
  raise HTTPException(status_code=500, detail=safe_str(e))
556
 
557
+ # ---- Hybrid KB search (semantic intent aware) ----
558
  kb_results = hybrid_search_knowledge_base(input_data.user_message, top_k=10, alpha=0.6, beta=0.4)
559
  kb_ctx = extract_kb_context(kb_results, top_chunks=2)
560
  context_raw = kb_ctx.get("context", "") or ""
561
+ detected_intent, detected_intent_conf = detect_user_intent(input_data.user_message)
562
 
563
  filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
564
  context = filtered_text
565
  context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
566
  best_distance = kb_ctx.get("best_score")
567
  best_combined = kb_ctx.get("best_combined")
 
 
568
  best_doc = kb_results.get("best_doc")
569
  top_meta = (kb_results.get("metadatas") or [{}])[0] if (kb_results.get("metadatas") or []) else {}
570
 
571
+ # ---- FULL SECTION when strongly found & steps intent ----
572
+ high_conf = (best_combined is not None and best_combined >= 0.70) and (detected_intent_conf >= 0.55)
573
+ if detected_intent == "steps" and best_doc and high_conf:
574
  full_steps = get_best_steps_section_text(best_doc)
575
+ if not full_steps and top_meta.get("section"):
576
+ full_steps = get_section_text(best_doc, top_meta.get("section"))
 
 
 
577
  if full_steps:
578
+ # show all procedural lines (no truncation)
579
+ context = _extract_steps_only(full_steps, max_lines=None)
580
+
581
+ # ---- Permission/Errors/Prereqs → tips + escalation if available ----
582
+ if detected_intent in ("permission", "errors", "prereqs") and best_doc:
583
+ errors = get_section_text(best_doc, "Common Errors & Resolution")
584
+ escalation = get_section_text(best_doc, "Escalation Path")
585
+ resp_lines: List[str] = []
586
+ if errors:
587
+ resp_lines.append("**Resolution Tips:**")
588
+ resp_lines.extend([f"- {ln.strip()}" for ln in errors.splitlines() if ln.strip()])
589
+ if escalation:
590
+ resp_lines.append("\n**Escalation Path:**")
591
+ resp_lines.append(escalation.strip())
592
+ if resp_lines:
593
+ return {
594
+ "bot_response": "\n".join(resp_lines),
595
+ "status": "PARTIAL",
596
+ "context_found": True,
597
+ "ask_resolved": False,
598
+ "suggest_incident": True,
599
+ "followup": "Shall I create a ticket for WMS Support?",
600
+ "top_hits": [],
601
+ "sources": [],
602
+ "debug": {"intent": detected_intent, "best_doc": best_doc},
603
+ }
604
 
605
+ # Intent-shaped extraction (secondary)
606
  q = (input_data.user_message or "").lower()
607
  if detected_intent == "steps" or any(k in q for k in ["steps", "procedure", "perform", "do", "process"]):
608
+ context = _extract_steps_only(context, max_lines=None if (best_combined and best_combined >= 0.70) else 12)
609
  elif detected_intent == "errors" or any(k in q for k in ["error", "issue", "fail", "not working", "resolution", "fix"]):
610
  context = _extract_errors_only(context, max_lines=10)
611
  elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
 
639
  "debug": {"used_chunks": 0, "second_try": second_try, "best_distance": best_distance, "best_combined": best_combined},
640
  }
641
 
642
+ # LLM rewrite (kept) will be formatted if empty/fallback
643
  enhanced_prompt = (
644
  "From the provided context, output only the actionable steps/procedure relevant to the user's question. "
645
  "Use ONLY the provided context; do NOT add information that is not present. "
646
+ "Do NOT include document names, section titles, or 'Source:' lines.\n\n"
 
647
  f"### Context\n{context}\n\n"
648
  f"### Question\n{input_data.user_message}\n\n"
649
  "### Output\n"
 
670
  if not bot_text.strip():
671
  bot_text = context
672
  bot_text = _strip_any_source_lines(bot_text).strip()
673
+
674
+ # --- Steps Markdown formatting (merge numeric-only lines) ---
675
+ if detected_intent == "steps":
676
+ raw_lines = [ln.strip() for ln in bot_text.splitlines() if ln.strip()]
677
+ if len(raw_lines) == 1:
678
+ parts = [p.strip() for p in re.split(r"\.\s+(?=[A-Z0-9])", raw_lines[0]) if p.strip()]
679
+ raw_lines = parts if len(parts) > 1 else raw_lines
680
+
681
+ merged: List[str] = []
682
+ i = 0
683
+ while i < len(raw_lines):
684
+ curr = raw_lines[i]
685
+ if re.fullmatch(r"\d+[\.\)]?", curr) and (i + 1) < len(raw_lines):
686
+ num = re.match(r"(\d+)", curr).group(1)
687
+ merged.append(f"{num}. {raw_lines[i+1].strip()}")
688
+ i += 2
689
+ else:
690
+ merged.append(curr)
691
+ i += 1
692
+ bot_text = _format_steps_markdown(merged)
 
 
 
 
 
 
 
693
 
694
  status = "OK" if (
695
  (best_combined is not None and best_combined >= gate_combined_ok)
 
717
  "filter_mode": filt_info.get("mode"),
718
  "matched_count": filt_info.get("matched_count"),
719
  "user_intent": detected_intent,
720
+ "user_intent_conf": detected_intent_conf,
721
  "best_doc": best_doc,
722
  },
723
  }
 
727
  except Exception as e:
728
  raise HTTPException(status_code=500, detail=safe_str(e))
729
 
730
+ # ---------- Incident endpoints ----------
731
  def _set_incident_resolved(sys_id: str) -> bool:
732
  try:
733
  token = get_valid_token()
 
848
  )
849
  headers = {"Content-Type": "application/json"}
850
  payload = {"contents": [{"parts": [{"text": prompt}]}]}
851
+ resp = requests.post(GEMINI_URL, headers=headers, json=payload, timeout=25, verify=GEMINI_SSL_VERIFY)
852
  try:
853
  data = resp.json()
854
  except Exception:
 
910
  except Exception as e:
911
  raise HTTPException(status_code=500, detail=safe_str(e))
912
 
913
+ # ---- Admin endpoints ----
914
  @app.get("/kb/info")
915
  async def kb_info():
916
  from services.kb_creation import get_kb_runtime_info