srilakshu012456 commited on
Commit
aa65b5b
·
verified ·
1 Parent(s): 60c3916

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +61 -39
main.py CHANGED
@@ -10,12 +10,12 @@ from fastapi.middleware.cors import CORSMiddleware
10
  from pydantic import BaseModel
11
  from dotenv import load_dotenv
12
  from datetime import datetime
13
- #new
14
  # KB services (Chroma + sentence-transformers + BM25 hybrid)
15
  from services.kb_creation import (
16
  collection,
17
  ingest_documents,
18
- hybrid_search_knowledge_base, # new
19
  )
20
 
21
  # Optional routers/utilities you already have
@@ -68,22 +68,18 @@ class ChatInput(BaseModel):
68
  prev_status: Optional[str] = None # "NO_KB_MATCH" | "PARTIAL" | "OK" | None
69
  last_issue: Optional[str] = None
70
 
71
-
72
  class IncidentInput(BaseModel):
73
  short_description: str
74
  description: str
75
  mark_resolved: Optional[bool] = False
76
 
77
-
78
  class TicketDescInput(BaseModel):
79
  issue: str
80
 
81
-
82
  class TicketStatusInput(BaseModel):
83
  sys_id: Optional[str] = None
84
  number: Optional[str] = None # IncidentID (incident number)
85
 
86
-
87
  # ✅ Human‑readable mapping for ServiceNow incident state codes
88
  STATE_MAP = {
89
  "1": "New",
@@ -169,7 +165,6 @@ def extract_kb_context(kb_results: Optional[Dict[str, Any]], top_chunks: int = 2
169
  "best_combined": best_combined,
170
  }
171
 
172
-
173
  def _strip_any_source_lines(text: str) -> str:
174
  lines = text.splitlines()
175
  kept = []
@@ -179,7 +174,6 @@ def _strip_any_source_lines(text: str) -> str:
179
  kept.append(ln)
180
  return "\n".join(kept).strip()
181
 
182
-
183
  def _build_clarifying_message() -> str:
184
  return (
185
  "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:\n\n"
@@ -193,10 +187,6 @@ def _build_clarifying_message() -> str:
193
 
194
  # ---------- Intent helpers ----------
195
  def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> tuple[str, str]:
196
- """
197
- Short: first 100 chars of the ORIGINAL issue text (preferred).
198
- Long: clear sentence that includes both original issue and resolved ack.
199
- """
200
  issue = (issue_text or "").strip()
201
  resolved = (resolved_text or "").strip()
202
  short_desc = issue[:100] if issue else (resolved[:100] or "Issue resolved (user confirmation)")
@@ -207,7 +197,6 @@ def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> tuple[s
207
  ).strip()
208
  return short_desc, long_desc
209
 
210
-
211
  def _is_incident_intent(msg_norm: str) -> bool:
212
  intent_phrases = [
213
  "create ticket", "create a ticket", "raise ticket", "raise a ticket", "open ticket", "open a ticket",
@@ -217,7 +206,6 @@ def _is_incident_intent(msg_norm: str) -> bool:
217
  ]
218
  return any(p in msg_norm for p in intent_phrases)
219
 
220
-
221
  def _is_feedback_message(msg_norm: str) -> bool:
222
  feedback_phrases = [
223
  "issue not resolved", "not resolved", "still not working",
@@ -226,7 +214,6 @@ def _is_feedback_message(msg_norm: str) -> bool:
226
  ]
227
  return any(p in msg_norm for p in feedback_phrases)
228
 
229
-
230
  def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
231
  status_keywords = [
232
  "status", "ticket status", "incident status",
@@ -246,7 +233,6 @@ def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
246
  return {"number": val.upper() if val.lower().startswith("inc") else val}
247
  return {"number": None, "ask_number": True}
248
 
249
-
250
  def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
251
  phrases = [
252
  "it is resolved", "resolved", "issue resolved", "problem resolved",
@@ -255,7 +241,6 @@ def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
255
  ]
256
  return any(p in msg_norm for p in phrases)
257
 
258
-
259
  def _has_negation_resolved(msg_norm: str) -> bool:
260
  neg_phrases = [
261
  "not resolved", "issue not resolved", "still not working", "not working",
@@ -263,7 +248,6 @@ def _has_negation_resolved(msg_norm: str) -> bool:
263
  ]
264
  return any(p in msg_norm for p in neg_phrases)
265
 
266
-
267
  def _classify_resolution_llm(user_message: str) -> bool:
268
  if not GEMINI_API_KEY:
269
  return False
@@ -289,12 +273,7 @@ def _classify_resolution_llm(user_message: str) -> bool:
289
  except Exception:
290
  return False
291
 
292
-
293
  def _is_generic_issue(msg_norm: str) -> bool:
294
- """
295
- Returns True for very generic/open-ended issue statements that
296
- shouldn’t trigger a KB search yet.
297
- """
298
  generic_phrases = [
299
  "issue", "have an issue", "having an issue", "got an issue",
300
  "problem", "have a problem", "help", "need help", "support",
@@ -308,20 +287,17 @@ STRICT_OVERLAP = 3 # ≥3 shared terms → treat as exact match
308
  MAX_SENTENCES_STRICT = 4 # limit for exact-mode
309
  MAX_SENTENCES_CONCISE = 3 # limit for partial-mode
310
 
311
-
312
  def _normalize_for_match(text: str) -> str:
313
  t = (text or "").lower()
314
  t = re.sub(r"[^\w\s]", " ", t) # remove punctuation
315
  t = re.sub(r"\s+", " ", t).strip() # collapse spaces
316
  return t
317
 
318
-
319
  def _split_sentences(ctx: str) -> list[str]:
320
  # crude sentence split: punctuation/newlines/bullets/dashes
321
  raw_sents = re.split(r"(?<=[.!?])\s+|\n+|•\s*|-\s*", ctx or "")
322
  return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
323
 
324
-
325
  def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
326
  """
327
  Returns (filtered_text, info) where filtered_text is:
@@ -376,6 +352,44 @@ def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
376
  'all_sentences': len(sentences)
377
  }
378
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  # ---------- Health ----------
380
  @app.get("/")
381
  async def health_check():
@@ -593,12 +607,23 @@ async def chat_with_ai(input_data: ChatInput):
593
  kb_ctx = extract_kb_context(kb_results, top_chunks=2)
594
  context_raw = kb_ctx.get("context", "") or ""
595
 
596
- # NEW: filter to exact/concise and always preserve original order of matched sentences
597
  filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
598
  context = filtered_text
599
  context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
600
  best_distance = kb_ctx.get("best_score") # lower = better
601
  best_combined = kb_ctx.get("best_combined") # higher = better
 
 
 
 
 
 
 
 
 
 
 
602
 
603
  # Dynamic gating
604
  short_query = len((input_data.user_message or "").split()) <= 4
@@ -631,22 +656,21 @@ async def chat_with_ai(input_data: ChatInput):
631
  # We have KB context → LLM rewrite (KB‑only, no Source lines)
632
  threshold_ok = gate_combined_ok
633
  mode_note = (
634
- "Return ONLY the matched lines from the context in the same order. "
635
- "Do not add any other lines or generic instructions."
636
  if filt_info.get("mode") == "exact" else
637
- "Return a short, meaningful snippet (2-3 sentences) strictly based on the context."
638
  )
639
 
640
  enhanced_prompt = (
641
- "Rewrite the following knowledge base context into clear, actionable steps for the user's question. "
642
- "Use ONLY the provided context; do NOT add information that is not present in it. "
643
  f"{mode_note} "
644
- "Do NOT include any document names, section titles, or 'Source:' lines in your output.\n\n"
645
  f"### Context\n{context}\n\n"
646
  f"### Question\n{input_data.user_message}\n\n"
647
  "### Output\n"
648
- "- Provide concise bullets or sentences.\n"
649
- "- If context is insufficient for an exact answer, add: 'This may be partial based on available KB.'\n"
650
  )
651
 
652
  headers = {"Content-Type": "application/json"}
@@ -670,7 +694,7 @@ async def chat_with_ai(input_data: ChatInput):
670
  bot_text = ""
671
 
672
  if not bot_text.strip():
673
- # Fallback to the filtered context (never the full SOP chunk)
674
  bot_text = context
675
  bot_text = _strip_any_source_lines(bot_text).strip()
676
 
@@ -703,6 +727,7 @@ async def chat_with_ai(input_data: ChatInput):
703
  "http_status": getattr(resp, "status_code", 0),
704
  "filter_mode": filt_info.get("mode"),
705
  "matched_count": filt_info.get("matched_count"),
 
706
  },
707
  }
708
 
@@ -802,7 +827,6 @@ def _set_incident_resolved(sys_id: str) -> bool:
802
  print(f"[SN PATCH resolve] exception={str(e)}")
803
  return False
804
 
805
-
806
  @app.post("/incident")
807
  async def raise_incident(input_data: IncidentInput):
808
  try:
@@ -829,7 +853,6 @@ async def raise_incident(input_data: IncidentInput):
829
  except Exception as e:
830
  raise HTTPException(status_code=500, detail=str(e))
831
 
832
-
833
  @app.post("/generate_ticket_desc")
834
  async def generate_ticket_desc_ep(input_data: TicketDescInput):
835
  try:
@@ -867,7 +890,6 @@ async def generate_ticket_desc_ep(input_data: TicketDescInput):
867
  except Exception as e:
868
  raise HTTPException(status_code=500, detail=str(e))
869
 
870
-
871
  @app.post("/incident_status")
872
  async def incident_status(input_data: TicketStatusInput):
873
  try:
 
10
  from pydantic import BaseModel
11
  from dotenv import load_dotenv
12
  from datetime import datetime
13
+
14
  # KB services (Chroma + sentence-transformers + BM25 hybrid)
15
  from services.kb_creation import (
16
  collection,
17
  ingest_documents,
18
+ hybrid_search_knowledge_base, # intent-aware hybrid
19
  )
20
 
21
  # Optional routers/utilities you already have
 
68
  prev_status: Optional[str] = None # "NO_KB_MATCH" | "PARTIAL" | "OK" | None
69
  last_issue: Optional[str] = None
70
 
 
71
  class IncidentInput(BaseModel):
72
  short_description: str
73
  description: str
74
  mark_resolved: Optional[bool] = False
75
 
 
76
  class TicketDescInput(BaseModel):
77
  issue: str
78
 
 
79
  class TicketStatusInput(BaseModel):
80
  sys_id: Optional[str] = None
81
  number: Optional[str] = None # IncidentID (incident number)
82
 
 
83
  # ✅ Human‑readable mapping for ServiceNow incident state codes
84
  STATE_MAP = {
85
  "1": "New",
 
165
  "best_combined": best_combined,
166
  }
167
 
 
168
  def _strip_any_source_lines(text: str) -> str:
169
  lines = text.splitlines()
170
  kept = []
 
174
  kept.append(ln)
175
  return "\n".join(kept).strip()
176
 
 
177
  def _build_clarifying_message() -> str:
178
  return (
179
  "I couldn’t find matching content in the KB yet. To help me narrow it down, please share:\n\n"
 
187
 
188
  # ---------- Intent helpers ----------
189
  def _build_tracking_descriptions(issue_text: str, resolved_text: str) -> tuple[str, str]:
 
 
 
 
190
  issue = (issue_text or "").strip()
191
  resolved = (resolved_text or "").strip()
192
  short_desc = issue[:100] if issue else (resolved[:100] or "Issue resolved (user confirmation)")
 
197
  ).strip()
198
  return short_desc, long_desc
199
 
 
200
  def _is_incident_intent(msg_norm: str) -> bool:
201
  intent_phrases = [
202
  "create ticket", "create a ticket", "raise ticket", "raise a ticket", "open ticket", "open a ticket",
 
206
  ]
207
  return any(p in msg_norm for p in intent_phrases)
208
 
 
209
  def _is_feedback_message(msg_norm: str) -> bool:
210
  feedback_phrases = [
211
  "issue not resolved", "not resolved", "still not working",
 
214
  ]
215
  return any(p in msg_norm for p in feedback_phrases)
216
 
 
217
  def _parse_ticket_status_intent(msg_norm: str) -> Dict[str, Optional[str]]:
218
  status_keywords = [
219
  "status", "ticket status", "incident status",
 
233
  return {"number": val.upper() if val.lower().startswith("inc") else val}
234
  return {"number": None, "ask_number": True}
235
 
 
236
  def _is_resolution_ack_heuristic(msg_norm: str) -> bool:
237
  phrases = [
238
  "it is resolved", "resolved", "issue resolved", "problem resolved",
 
241
  ]
242
  return any(p in msg_norm for p in phrases)
243
 
 
244
  def _has_negation_resolved(msg_norm: str) -> bool:
245
  neg_phrases = [
246
  "not resolved", "issue not resolved", "still not working", "not working",
 
248
  ]
249
  return any(p in msg_norm for p in neg_phrases)
250
 
 
251
  def _classify_resolution_llm(user_message: str) -> bool:
252
  if not GEMINI_API_KEY:
253
  return False
 
273
  except Exception:
274
  return False
275
 
 
276
  def _is_generic_issue(msg_norm: str) -> bool:
 
 
 
 
277
  generic_phrases = [
278
  "issue", "have an issue", "having an issue", "got an issue",
279
  "problem", "have a problem", "help", "need help", "support",
 
287
  MAX_SENTENCES_STRICT = 4 # limit for exact-mode
288
  MAX_SENTENCES_CONCISE = 3 # limit for partial-mode
289
 
 
290
  def _normalize_for_match(text: str) -> str:
291
  t = (text or "").lower()
292
  t = re.sub(r"[^\w\s]", " ", t) # remove punctuation
293
  t = re.sub(r"\s+", " ", t).strip() # collapse spaces
294
  return t
295
 
 
296
  def _split_sentences(ctx: str) -> list[str]:
297
  # crude sentence split: punctuation/newlines/bullets/dashes
298
  raw_sents = re.split(r"(?<=[.!?])\s+|\n+|•\s*|-\s*", ctx or "")
299
  return [s.strip() for s in raw_sents if s and len(s.strip()) > 2]
300
 
 
301
  def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
302
  """
303
  Returns (filtered_text, info) where filtered_text is:
 
352
  'all_sentences': len(sentences)
353
  }
354
 
355
+ # ---------- NEW: intent-specific line extractors (steps/navigation/errors) ----------
356
+ STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+\.\s+|[•\-]\s+)")
357
+ NAV_LINE_REGEX = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
358
+
359
+ def _extract_steps_only(text: str, max_lines: int = 12) -> str:
360
+ """
361
+ Keep only numbered/bulleted lines in original order.
362
+ Accepts formats like '1. ...', '2. ...', '• ...', '- ...'.
363
+ """
364
+ lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
365
+ kept = []
366
+ for ln in lines:
367
+ if STEP_LINE_REGEX.match(ln):
368
+ kept.append(ln)
369
+ if len(kept) >= max_lines:
370
+ break
371
+ return "\n".join(kept).strip() if kept else (text or "").strip()
372
+
373
+ def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
374
+ lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
375
+ kept = []
376
+ for ln in lines:
377
+ if NAV_LINE_REGEX.search(ln) or ln.lower().startswith("log in"):
378
+ kept.append(ln)
379
+ if len(kept) >= max_lines:
380
+ break
381
+ return "\n".join(kept).strip() if kept else (text or "").strip()
382
+
383
+ def _extract_errors_only(text: str, max_lines: int = 10) -> str:
384
+ lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
385
+ kept = []
386
+ for ln in lines:
387
+ if STEP_LINE_REGEX.match(ln) or ln.lower().startswith(("error", "resolution", "fix", "verify", "check")):
388
+ kept.append(ln)
389
+ if len(kept) >= max_lines:
390
+ break
391
+ return "\n".join(kept).strip() if kept else (text or "").strip()
392
+
393
  # ---------- Health ----------
394
  @app.get("/")
395
  async def health_check():
 
607
  kb_ctx = extract_kb_context(kb_results, top_chunks=2)
608
  context_raw = kb_ctx.get("context", "") or ""
609
 
610
+ # Filter to exact/concise and always preserve original order of matched sentences
611
  filtered_text, filt_info = _filter_context_for_query(context_raw, input_data.user_message)
612
  context = filtered_text
613
  context_found = bool(kb_ctx.get("context_found", False)) and bool(context.strip())
614
  best_distance = kb_ctx.get("best_score") # lower = better
615
  best_combined = kb_ctx.get("best_combined") # higher = better
616
+ detected_intent = kb_results.get("user_intent", "neutral")
617
+
618
+ # Intent-shaped extraction (steps/navigation/errors)
619
+ q = (input_data.user_message or "").lower()
620
+ if detected_intent == "steps" or any(k in q for k in ["steps", "procedure", "perform", "do", "process"]):
621
+ context = _extract_steps_only(context, max_lines=12)
622
+ elif detected_intent == "errors" or any(k in q for k in ["error", "issue", "fail", "not working", "resolution", "fix"]):
623
+ context = _extract_errors_only(context, max_lines=10)
624
+ elif any(k in q for k in ["navigate", "navigation", "menu", "screen"]):
625
+ context = _extract_navigation_only(context, max_lines=6)
626
+ # else: leave context as-is (concise filter already applied)
627
 
628
  # Dynamic gating
629
  short_query = len((input_data.user_message or "").split()) <= 4
 
656
  # We have KB context → LLM rewrite (KB‑only, no Source lines)
657
  threshold_ok = gate_combined_ok
658
  mode_note = (
659
+ "Return ONLY the matched lines from the context in the same order."
 
660
  if filt_info.get("mode") == "exact" else
661
+ "Return a short, meaningful snippet strictly based on the context."
662
  )
663
 
664
  enhanced_prompt = (
665
+ "From the provided context, output only the actionable steps/procedure relevant to the user's question. "
666
+ "Use ONLY the provided context; do NOT add information that is not present. "
667
  f"{mode_note} "
668
+ "Do NOT include any document names, section titles, or 'Source:' lines.\n\n"
669
  f"### Context\n{context}\n\n"
670
  f"### Question\n{input_data.user_message}\n\n"
671
  "### Output\n"
672
+ "- Return numbered/bulleted steps only, in the same order.\n"
673
+ "- If context is insufficient, add: 'This may be partial based on available KB.'\n"
674
  )
675
 
676
  headers = {"Content-Type": "application/json"}
 
694
  bot_text = ""
695
 
696
  if not bot_text.strip():
697
+ # Fallback to the filtered/intent-shaped context (never the full SOP chunk)
698
  bot_text = context
699
  bot_text = _strip_any_source_lines(bot_text).strip()
700
 
 
727
  "http_status": getattr(resp, "status_code", 0),
728
  "filter_mode": filt_info.get("mode"),
729
  "matched_count": filt_info.get("matched_count"),
730
+ "user_intent": detected_intent,
731
  },
732
  }
733
 
 
827
  print(f"[SN PATCH resolve] exception={str(e)}")
828
  return False
829
 
 
830
  @app.post("/incident")
831
  async def raise_incident(input_data: IncidentInput):
832
  try:
 
853
  except Exception as e:
854
  raise HTTPException(status_code=500, detail=str(e))
855
 
 
856
  @app.post("/generate_ticket_desc")
857
  async def generate_ticket_desc_ep(input_data: TicketDescInput):
858
  try:
 
890
  except Exception as e:
891
  raise HTTPException(status_code=500, detail=str(e))
892
 
 
893
  @app.post("/incident_status")
894
  async def incident_status(input_data: TicketStatusInput):
895
  try: