Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -312,57 +312,56 @@ def _soft_match_score(a: str, b: str) -> float:
|
|
| 312 |
# combine (weights tuned to prefer phrase overlaps slightly)
|
| 313 |
return min(1.0, 0.65 * jacc + 0.45 * big)
|
| 314 |
|
| 315 |
-
|
| 316 |
def _detect_next_intent(user_query: str) -> bool:
|
| 317 |
q = _norm_text(user_query)
|
| 318 |
keys = [
|
| 319 |
'after','after this','what next','whats next','next step',
|
| 320 |
'then what','following step','continue','subsequent','proceed',
|
| 321 |
-
# new entries
|
| 322 |
'what to do','what should i do','how to proceed','how do i continue','proceed further','next?'
|
| 323 |
]
|
| 324 |
return any(k in q for k in keys)
|
| 325 |
|
| 326 |
-
|
| 327 |
def _resolve_next_steps(user_query: str, numbered_text: str, max_next: int = 8, min_score: float = 0.25):
|
| 328 |
"""
|
| 329 |
Robust next-step resolver:
|
| 330 |
1) Detect 'what next' intent.
|
| 331 |
2) Stem & match query against each step using tokens + bigrams + synonyms.
|
| 332 |
3) If a good anchor is found, return ONLY subsequent steps (window=max_next).
|
| 333 |
-
|
| 334 |
"""
|
| 335 |
if not _detect_next_intent(user_query):
|
| 336 |
return None
|
| 337 |
-
|
| 338 |
steps = _split_sop_into_steps(numbered_text)
|
| 339 |
if not steps:
|
| 340 |
return None
|
| 341 |
-
|
| 342 |
q_norm = _norm_text(user_query)
|
| 343 |
q_tokens = [t for t in q_norm.split() if len(t) > 1]
|
| 344 |
-
|
| 345 |
best_idx, best_score = -1, -1.0
|
| 346 |
for idx, step in enumerate(steps):
|
| 347 |
-
# base fuzzy score
|
| 348 |
s1 = _soft_match_score(user_query, step)
|
| 349 |
-
# synonym hits
|
| 350 |
syn = _syn_hits(q_tokens, step)
|
| 351 |
-
# combined score (synonyms are discrete)
|
| 352 |
score = s1 + 0.12 * syn
|
| 353 |
if score > best_score:
|
| 354 |
best_score, best_idx = score, idx
|
| 355 |
-
|
| 356 |
-
# Looser threshold to accept anchors with synonyms / tense differences
|
| 357 |
if best_idx < 0 or best_score < min_score:
|
| 358 |
-
return None
|
| 359 |
-
|
| 360 |
start = best_idx + 1
|
| 361 |
if start >= len(steps):
|
| 362 |
-
return []
|
| 363 |
-
|
| 364 |
end = min(start + max_next, len(steps))
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
def _syn_hits(q_tokens: List[str], step_line: str) -> int:
|
| 368 |
"""
|
|
@@ -936,43 +935,38 @@ async def chat_with_ai(input_data: ChatInput):
|
|
| 936 |
context_preformatted = False
|
| 937 |
|
| 938 |
if best_doc and detected_intent == "steps":
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
| 944 |
-
|
| 945 |
-
full_steps = get_section_text(best_doc, sec)
|
| 946 |
-
else:
|
| 947 |
-
full_steps = get_best_steps_section_text(best_doc)
|
| 948 |
-
if not full_steps and sec:
|
| 949 |
-
full_steps = get_section_text(best_doc, sec)
|
| 950 |
if full_steps:
|
| 951 |
-
|
| 952 |
-
|
| 953 |
input_data.user_message,
|
| 954 |
numbered_full,
|
| 955 |
max_next=6,
|
| 956 |
-
min_score=0.35
|
| 957 |
-
|
| 958 |
-
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
|
| 970 |
-
|
| 971 |
-
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
|
| 977 |
elif best_doc and detected_intent == "errors":
|
| 978 |
full_errors = get_best_errors_section_text(best_doc)
|
|
|
|
| 312 |
# combine (weights tuned to prefer phrase overlaps slightly)
|
| 313 |
return min(1.0, 0.65 * jacc + 0.45 * big)
|
| 314 |
|
|
|
|
| 315 |
def _detect_next_intent(user_query: str) -> bool:
|
| 316 |
q = _norm_text(user_query)
|
| 317 |
keys = [
|
| 318 |
'after','after this','what next','whats next','next step',
|
| 319 |
'then what','following step','continue','subsequent','proceed',
|
|
|
|
| 320 |
'what to do','what should i do','how to proceed','how do i continue','proceed further','next?'
|
| 321 |
]
|
| 322 |
return any(k in q for k in keys)
|
| 323 |
|
|
|
|
| 324 |
def _resolve_next_steps(user_query: str, numbered_text: str, max_next: int = 8, min_score: float = 0.25):
|
| 325 |
"""
|
| 326 |
Robust next-step resolver:
|
| 327 |
1) Detect 'what next' intent.
|
| 328 |
2) Stem & match query against each step using tokens + bigrams + synonyms.
|
| 329 |
3) If a good anchor is found, return ONLY subsequent steps (window=max_next).
|
| 330 |
+
Else return None (fallback to full SOP rendering).
|
| 331 |
"""
|
| 332 |
if not _detect_next_intent(user_query):
|
| 333 |
return None
|
|
|
|
| 334 |
steps = _split_sop_into_steps(numbered_text)
|
| 335 |
if not steps:
|
| 336 |
return None
|
|
|
|
| 337 |
q_norm = _norm_text(user_query)
|
| 338 |
q_tokens = [t for t in q_norm.split() if len(t) > 1]
|
|
|
|
| 339 |
best_idx, best_score = -1, -1.0
|
| 340 |
for idx, step in enumerate(steps):
|
|
|
|
| 341 |
s1 = _soft_match_score(user_query, step)
|
|
|
|
| 342 |
syn = _syn_hits(q_tokens, step)
|
|
|
|
| 343 |
score = s1 + 0.12 * syn
|
| 344 |
if score > best_score:
|
| 345 |
best_score, best_idx = score, idx
|
|
|
|
|
|
|
| 346 |
if best_idx < 0 or best_score < min_score:
|
| 347 |
+
return None
|
|
|
|
| 348 |
start = best_idx + 1
|
| 349 |
if start >= len(steps):
|
| 350 |
+
return []
|
|
|
|
| 351 |
end = min(start + max_next, len(steps))
|
| 352 |
+
anchor_norm = _norm_text(steps[best_idx])
|
| 353 |
+
def _unique(seq):
|
| 354 |
+
seen = set()
|
| 355 |
+
out = []
|
| 356 |
+
for s in seq:
|
| 357 |
+
k = _norm_text(s)
|
| 358 |
+
if k == anchor_norm:
|
| 359 |
+
continue
|
| 360 |
+
if k not in seen:
|
| 361 |
+
seen.add(k)
|
| 362 |
+
out.append(s)
|
| 363 |
+
return out
|
| 364 |
+
return _unique(steps[start:end])
|
| 365 |
|
| 366 |
def _syn_hits(q_tokens: List[str], step_line: str) -> int:
|
| 367 |
"""
|
|
|
|
| 935 |
context_preformatted = False
|
| 936 |
|
| 937 |
if best_doc and detected_intent == "steps":
|
| 938 |
+
full_steps = get_best_steps_section_text(best_doc)
|
| 939 |
+
if not full_steps:
|
| 940 |
+
sec = (top_meta or {}).get("section")
|
| 941 |
+
if sec:
|
| 942 |
+
full_steps = get_section_text(best_doc, sec)
|
| 943 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 944 |
if full_steps:
|
| 945 |
+
numbered_full = _ensure_numbering(full_steps)
|
| 946 |
+
next_only = _resolve_next_steps(
|
| 947 |
input_data.user_message,
|
| 948 |
numbered_full,
|
| 949 |
max_next=6,
|
| 950 |
+
min_score=0.35
|
| 951 |
+
)
|
| 952 |
+
if next_only is not None:
|
| 953 |
+
if len(next_only) == 0:
|
| 954 |
+
context = "You are at the final step of this SOP. No further steps."
|
| 955 |
+
next_step_applied = True
|
| 956 |
+
next_step_info = {"count": 0}
|
| 957 |
+
context_preformatted = True
|
| 958 |
+
else:
|
| 959 |
+
context = _format_steps_as_numbered(next_only)
|
| 960 |
+
next_step_applied = True
|
| 961 |
+
next_step_info = {"count": len(next_only)}
|
| 962 |
+
context_preformatted = True
|
| 963 |
+
else:
|
| 964 |
+
context = full_steps
|
| 965 |
+
context_preformatted = False
|
| 966 |
+
|
| 967 |
+
# clear filter info for debug clarity
|
| 968 |
+
filt_info = {'mode': None, 'matched_count': None, 'all_sentences': None}
|
| 969 |
+
context_found = True
|
| 970 |
|
| 971 |
elif best_doc and detected_intent == "errors":
|
| 972 |
full_errors = get_best_errors_section_text(best_doc)
|