srilakshu012456 commited on
Commit
2ef7e03
·
verified ·
1 Parent(s): 0384218

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +101 -0
main.py CHANGED
@@ -225,6 +225,107 @@ def _filter_numbered_steps_by_actions(numbered_text: str,
225
  # If over-filtering made it empty, fall back to original text
226
  return "\n".join(out_lines).strip() or (numbered_text or "").strip()
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  def _ensure_numbering(text: str) -> str:
229
  """
230
  Normalize raw SOP steps into a clean numbered list using circled digits.
 
225
  # If over-filtering made it empty, fall back to original text
226
  return "\n".join(out_lines).strip() or (numbered_text or "").strip()
227
 
228
+ # --- NEW: extract the anchor clause from user message ---
229
+ def _extract_anchor_from_query(msg: str) -> dict:
230
+ """
231
+ Split user message into:
232
+ - 'anchor': the clause we should match against SOP steps.
233
+ - 'has_followup': True when user is clearly asking what comes next (what next, what to do, then).
234
+ Works across any SOP text; no domain words required.
235
+ """
236
+ raw = (msg or "").strip()
237
+ low = _norm_text(raw)
238
+
239
+ # very small set of generic follow-up cues (no domain synonyms)
240
+ FOLLOWUP_CUES = ("what next", "what is next", "what to do", "then", "after that", "next")
241
+
242
+ has_followup = any(cue in low for cue in FOLLOWUP_CUES)
243
+
244
+ # Split by common separators to isolate the anchor clause
245
+ parts = [p.strip() for p in re.split(r"[?.,;:\-\n]+", raw) if p.strip()]
246
+ if not parts:
247
+ return {"anchor": raw, "has_followup": has_followup}
248
+
249
+ # If a follow-up cue is in the last part, prefer the preceding part as the anchor
250
+ last = parts[-1]
251
+ last_low = _norm_text(last)
252
+ if any(cue in last_low for cue in FOLLOWUP_CUES) and len(parts) >= 2:
253
+ anchor = parts[-2]
254
+ else:
255
+ anchor = parts[0] if len(parts) == 1 else parts[-1] # favor the end if user writes "... , then what next"
256
+
257
+ return {"anchor": anchor.strip(), "has_followup": has_followup}
258
+
259
+ # --- Core: keyword-free, anchor-based "next steps" ---
260
+ def _anchor_next_steps(user_message: str, numbered_text: str, max_next: int = 8) -> list | None:
261
+ """
262
+ Locate the step line (or sentence inside it) that best matches the user's anchor clause,
263
+ then return ONLY subsequent steps (renumbered by caller). Returns None if no strong anchor is found.
264
+ """
265
+
266
+ steps = _split_sop_into_steps(numbered_text)
267
+ if not steps:
268
+ return None
269
+
270
+ info = _extract_anchor_from_query(user_message)
271
+ anchor = info.get("anchor", "").strip()
272
+ if not anchor:
273
+ return None
274
+ anchor_norm = _norm_text(anchor)
275
+ has_followup = bool(info.get("has_followup"))
276
+
277
+ best_idx, best_score, best_literal = -1, -1.0, False
278
+ candidate_indices = []
279
+
280
+ for idx, step_line in enumerate(steps):
281
+ # Score on full line
282
+ s_full = _similarity(anchor, step_line)
283
+ literal_hit = False
284
+ sent_scores = [s_full]
285
+
286
+ # Also score each sentence within this step (to match middle sentences)
287
+ for s in _split_sentences(step_line):
288
+ sent_scores.append(_similarity(anchor, s))
289
+ # literal containment (punctuation-insensitive)
290
+ a_flat = re.sub(r"\W+", "", anchor_norm)
291
+ s_flat = re.sub(r"\W+", "", _norm_text(s))
292
+ if a_flat and (a_flat in s_flat or s_flat in a_flat):
293
+ literal_hit = True
294
+
295
+ score = max(sent_scores)
296
+ candidate_indices.append((idx, score, literal_hit))
297
+
298
+ # Choose best; for near ties prefer later index (progress assumption)
299
+ candidate_indices.sort(key=lambda t: (t[1], t[0]), reverse=True)
300
+ best_idx, best_score, best_literal = candidate_indices[0]
301
+
302
+ # Dynamic threshold:
303
+ # - literal containment → accept
304
+ # - follow-up cue → lower threshold (e.g., 0.50)
305
+ # - otherwise require a modest similarity
306
+ tok_count = len([t for t in anchor_norm.split() if len(t) > 1])
307
+
308
+ if best_literal:
309
+ accept = True
310
+ else:
311
+ base_ok = best_score >= (0.55 if not has_followup else 0.50)
312
+ len_ok = (best_score >= 0.40) and (tok_count >= 3)
313
+ accept = base_ok or len_ok
314
+
315
+ if not accept:
316
+ return None
317
+
318
+ # Start from the step AFTER the matched one
319
+ start = best_idx + 1
320
+ if start >= len(steps):
321
+ return [] # already at final step
322
+ end = min(start + max_next, len(steps))
323
+ next_steps = steps[start:end]
324
+
325
+ # Dedupe just in case adjacent chunks contain repeated lines
326
+ # (use the same numbering by caller)
327
+ return [ln for ln in _dedupe_lines("\n".join(next_steps)).splitlines() if ln.strip()]
328
+
329
  def _ensure_numbering(text: str) -> str:
330
  """
331
  Normalize raw SOP steps into a clean numbered list using circled digits.