srilakshu012456 commited on
Commit
4c40701
·
verified ·
1 Parent(s): aa65b5b

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +52 -4
main.py CHANGED
@@ -353,21 +353,68 @@ def _filter_context_for_query(context: str, query: str) -> tuple[str, dict]:
353
  }
354
 
355
  # ---------- NEW: intent-specific line extractors (steps/navigation/errors) ----------
356
- STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+\.\s+|[•\-]\s+)")
 
357
  NAV_LINE_REGEX = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  def _extract_steps_only(text: str, max_lines: int = 12) -> str:
360
  """
361
- Keep only numbered/bulleted lines in original order.
362
- Accepts formats like '1. ...', '2. ...', '• ...', '- ...'.
363
  """
364
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
365
  kept = []
366
  for ln in lines:
367
- if STEP_LINE_REGEX.match(ln):
368
  kept.append(ln)
369
  if len(kept) >= max_lines:
370
  break
 
371
  return "\n".join(kept).strip() if kept else (text or "").strip()
372
 
373
  def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
@@ -384,6 +431,7 @@ def _extract_errors_only(text: str, max_lines: int = 10) -> str:
384
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
385
  kept = []
386
  for ln in lines:
 
387
  if STEP_LINE_REGEX.match(ln) or ln.lower().startswith(("error", "resolution", "fix", "verify", "check")):
388
  kept.append(ln)
389
  if len(kept) >= max_lines:
 
353
  }
354
 
355
  # ---------- NEW: intent-specific line extractors (steps/navigation/errors) ----------
356
+
357
+ STEP_LINE_REGEX = re.compile(r"^\s*(?:\d+[\.\)]\s+|[•\-]\s+)", re.IGNORECASE)
358
  NAV_LINE_REGEX = re.compile(r"(navigate\s+to|>\s*)", re.IGNORECASE)
359
 
360
+ # Common imperative verbs across SOPs (add more if you want, optional)
361
+ PROCEDURE_VERBS = [
362
+ "log in", "select", "scan", "verify", "confirm", "print",
363
+ "move", "complete", "click", "open", "navigate", "choose",
364
+ "enter", "update", "save", "delete", "create", "attach", "assign"
365
+ ]
366
+ VERB_START_REGEX = re.compile(r"^\s*(?:" + "|".join([re.escape(v) for v in PROCEDURE_VERBS]) + r")\b", re.IGNORECASE)
367
+
368
+ # Lines that clearly are NOT steps when user intent is 'steps'
369
+ NON_PROCEDURAL_STARTS = [
370
+ "to ensure", "as per", "purpose", "pre-requisites", "prerequisites", "overview", "introduction"
371
+ ]
372
+ NON_PROC_REGEX = re.compile(r"^\s*(?:" + "|".join([re.escape(v) for v in NON_PROCEDURAL_STARTS]) + r")\b", re.IGNORECASE)
373
+
374
+ def _is_procedural_line(ln: str) -> bool:
375
+ """
376
+ A line is procedural if:
377
+ - it starts with a number/bullet, OR
378
+ - it starts with an imperative verb (Log in, Select, Scan, etc.)
379
+ and it does not look like Purpose/Pre-Requisites/Overview.
380
+ Bullets are kept only if they contain an action verb (to avoid prereq bullets).
381
+ """
382
+ s = (ln or "").strip()
383
+ if not s:
384
+ return False
385
+ # Exclude clearly non-procedural lines
386
+ if NON_PROC_REGEX.match(s):
387
+ return False
388
+
389
+ # Numbered/bulleted lines
390
+ if STEP_LINE_REGEX.match(s):
391
+ # Keep bullet only if an action verb appears somewhere in the line
392
+ if s.lstrip().startswith(("•", "-")):
393
+ return bool(VERB_START_REGEX.search(s))
394
+ return True
395
+
396
+ # Imperative verb lines (covers Word lists where 1. isn't part of the text)
397
+ if VERB_START_REGEX.match(s):
398
+ return True
399
+
400
+ # Allow navigation lines (even if not numbered)
401
+ if NAV_LINE_REGEX.search(s):
402
+ return True
403
+
404
+ return False
405
+
406
  def _extract_steps_only(text: str, max_lines: int = 12) -> str:
407
  """
408
+ Keep only procedural lines (numbered/bulleted or imperative verb starts) in original order.
 
409
  """
410
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
411
  kept = []
412
  for ln in lines:
413
+ if _is_procedural_line(ln):
414
  kept.append(ln)
415
  if len(kept) >= max_lines:
416
  break
417
+ # If nothing matched (rare), return the original “concise” filtered text
418
  return "\n".join(kept).strip() if kept else (text or "").strip()
419
 
420
  def _extract_navigation_only(text: str, max_lines: int = 6) -> str:
 
431
  lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
432
  kept = []
433
  for ln in lines:
434
+ # Keep error/resolution bullets or imperative fixes (verify, check, etc.)
435
  if STEP_LINE_REGEX.match(ln) or ln.lower().startswith(("error", "resolution", "fix", "verify", "check")):
436
  kept.append(ln)
437
  if len(kept) >= max_lines: