Update conversation_logic.py
Browse files- conversation_logic.py +77 -15
conversation_logic.py
CHANGED
|
@@ -266,12 +266,15 @@ def _is_direct_solve_request(text: str, intent: str) -> bool:
|
|
| 266 |
def should_retrieve(intent: str, solved: bool, raw_user_text: str, category: Optional[str] = None) -> bool:
|
| 267 |
normalized_category = normalize_category(category)
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
if _is_direct_solve_request(raw_user_text, intent):
|
| 270 |
return (not solved) and normalized_category in {"Verbal", "DataInsight"}
|
| 271 |
|
| 272 |
-
if intent in RETRIEVAL_ALLOWED_INTENTS:
|
| 273 |
-
return True
|
| 274 |
-
|
| 275 |
if not solved and normalized_category in {"Verbal", "DataInsight"}:
|
| 276 |
return True
|
| 277 |
|
|
@@ -526,33 +529,88 @@ def _is_bad_generated_reply(text: str, user_text: str = "") -> bool:
|
|
| 526 |
|
| 527 |
return False
|
| 528 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
def _pick_teaching_line(
|
| 530 |
chunks: List[RetrievedChunk],
|
| 531 |
current_reply: str,
|
|
|
|
| 532 |
) -> Optional[str]:
|
| 533 |
if not chunks:
|
| 534 |
return None
|
| 535 |
|
| 536 |
reply_keywords = _extract_keywords(current_reply)
|
|
|
|
| 537 |
best_line = None
|
| 538 |
-
best_score = -
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
for chunk in chunks:
|
| 541 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
if not text:
|
| 543 |
continue
|
| 544 |
-
if len(text) > 180:
|
| 545 |
-
text = text[:177].rstrip() + "..."
|
| 546 |
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 550 |
|
| 551 |
-
score = novelty - 0.3 * overlap
|
| 552 |
if score > best_score:
|
| 553 |
best_score = score
|
| 554 |
best_line = text
|
| 555 |
|
|
|
|
|
|
|
|
|
|
| 556 |
return best_line
|
| 557 |
|
| 558 |
class ConversationEngine:
|
|
@@ -676,10 +734,14 @@ class ConversationEngine:
|
|
| 676 |
result.used_retrieval = True
|
| 677 |
result.teaching_chunks = filtered
|
| 678 |
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
|
| 684 |
should_try_generator = (
|
| 685 |
self.generator is not None
|
|
|
|
| 266 |
def should_retrieve(intent: str, solved: bool, raw_user_text: str, category: Optional[str] = None) -> bool:
|
| 267 |
normalized_category = normalize_category(category)
|
| 268 |
|
| 269 |
+
if intent in {"walkthrough", "step_by_step", "method", "explain", "concept", "definition", "instruction"}:
|
| 270 |
+
return True
|
| 271 |
+
|
| 272 |
+
if intent == "hint":
|
| 273 |
+
return solved is False or normalized_category in {"Quantitative", "Verbal", "DataInsight"}
|
| 274 |
+
|
| 275 |
if _is_direct_solve_request(raw_user_text, intent):
|
| 276 |
return (not solved) and normalized_category in {"Verbal", "DataInsight"}
|
| 277 |
|
|
|
|
|
|
|
|
|
|
| 278 |
if not solved and normalized_category in {"Verbal", "DataInsight"}:
|
| 279 |
return True
|
| 280 |
|
|
|
|
| 529 |
|
| 530 |
return False
|
| 531 |
|
| 532 |
+
def _clean_teaching_text(text: str) -> str:
|
| 533 |
+
text = normalize_spaces((text or "").replace("\n", " ").strip())
|
| 534 |
+
text = re.sub(r"^[\-\•\*\d\.\)\s]+", "", text)
|
| 535 |
+
if len(text) > 160:
|
| 536 |
+
text = text[:157].rstrip() + "..."
|
| 537 |
+
return text
|
| 538 |
+
|
| 539 |
+
|
| 540 |
def _pick_teaching_line(
|
| 541 |
chunks: List[RetrievedChunk],
|
| 542 |
current_reply: str,
|
| 543 |
+
question_text: str,
|
| 544 |
) -> Optional[str]:
|
| 545 |
if not chunks:
|
| 546 |
return None
|
| 547 |
|
| 548 |
reply_keywords = _extract_keywords(current_reply)
|
| 549 |
+
question_keywords = _extract_keywords(question_text)
|
| 550 |
best_line = None
|
| 551 |
+
best_score = float("-inf")
|
| 552 |
+
|
| 553 |
+
principle_markers = {
|
| 554 |
+
"remember", "key", "idea", "rule", "method", "approach", "strategy",
|
| 555 |
+
"useful", "helps", "means", "convert", "rewrite", "isolate", "check",
|
| 556 |
+
}
|
| 557 |
|
| 558 |
for chunk in chunks:
|
| 559 |
+
raw_text = (chunk.text or "").strip()
|
| 560 |
+
if not raw_text:
|
| 561 |
+
continue
|
| 562 |
+
|
| 563 |
+
text = _clean_teaching_text(raw_text)
|
| 564 |
if not text:
|
| 565 |
continue
|
|
|
|
|
|
|
| 566 |
|
| 567 |
+
lower_text = text.lower()
|
| 568 |
+
chunk_keywords = _extract_keywords(lower_text)
|
| 569 |
+
|
| 570 |
+
novelty_vs_reply = len(chunk_keywords - reply_keywords)
|
| 571 |
+
novelty_vs_question = len(chunk_keywords - question_keywords)
|
| 572 |
+
overlap_with_reply = len(chunk_keywords & reply_keywords)
|
| 573 |
+
overlap_with_question = len(chunk_keywords & question_keywords)
|
| 574 |
+
|
| 575 |
+
principle_bonus = 0.0
|
| 576 |
+
if any(marker in lower_text for marker in principle_markers):
|
| 577 |
+
principle_bonus += 1.5
|
| 578 |
+
|
| 579 |
+
if any(
|
| 580 |
+
phrase in lower_text
|
| 581 |
+
for phrase in [
|
| 582 |
+
"convert percent to decimal",
|
| 583 |
+
"undo operations in reverse order",
|
| 584 |
+
"set up an equation",
|
| 585 |
+
"part-whole relationship",
|
| 586 |
+
"isolate the variable",
|
| 587 |
+
]
|
| 588 |
+
):
|
| 589 |
+
principle_bonus += 2.0
|
| 590 |
+
|
| 591 |
+
too_similar_penalty = 0.0
|
| 592 |
+
if overlap_with_reply >= max(3, novelty_vs_reply + 1):
|
| 593 |
+
too_similar_penalty += 2.0
|
| 594 |
+
|
| 595 |
+
score = (
|
| 596 |
+
1.8 * novelty_vs_reply
|
| 597 |
+
+ 1.0 * novelty_vs_question
|
| 598 |
+
+ principle_bonus
|
| 599 |
+
- 0.5 * overlap_with_question
|
| 600 |
+
- 0.8 * overlap_with_reply
|
| 601 |
+
- too_similar_penalty
|
| 602 |
+
)
|
| 603 |
+
|
| 604 |
+
if len(text.split()) < 5:
|
| 605 |
+
score -= 2.0
|
| 606 |
|
|
|
|
| 607 |
if score > best_score:
|
| 608 |
best_score = score
|
| 609 |
best_line = text
|
| 610 |
|
| 611 |
+
if best_score < 1.0:
|
| 612 |
+
return None
|
| 613 |
+
|
| 614 |
return best_line
|
| 615 |
|
| 616 |
class ConversationEngine:
|
|
|
|
| 734 |
result.used_retrieval = True
|
| 735 |
result.teaching_chunks = filtered
|
| 736 |
|
| 737 |
+
if selected_chunks and resolved_help_mode in {"walkthrough", "step_by_step", "method", "explain", "concept"}:
|
| 738 |
+
teaching_line = _pick_teaching_line(
|
| 739 |
+
chunks=selected_chunks,
|
| 740 |
+
current_reply=reply,
|
| 741 |
+
question_text=solver_input,
|
| 742 |
+
)
|
| 743 |
+
if teaching_line:
|
| 744 |
+
reply = f"{reply}\n\nKey idea: {teaching_line}"
|
| 745 |
|
| 746 |
should_try_generator = (
|
| 747 |
self.generator is not None
|