j-js commited on
Commit
90ad83b
·
verified ·
1 Parent(s): 4d6cf46

Update conversation_logic.py

Browse files
Files changed (1) hide show
  1. conversation_logic.py +77 -15
conversation_logic.py CHANGED
@@ -266,12 +266,15 @@ def _is_direct_solve_request(text: str, intent: str) -> bool:
266
  def should_retrieve(intent: str, solved: bool, raw_user_text: str, category: Optional[str] = None) -> bool:
267
  normalized_category = normalize_category(category)
268
 
 
 
 
 
 
 
269
  if _is_direct_solve_request(raw_user_text, intent):
270
  return (not solved) and normalized_category in {"Verbal", "DataInsight"}
271
 
272
- if intent in RETRIEVAL_ALLOWED_INTENTS:
273
- return True
274
-
275
  if not solved and normalized_category in {"Verbal", "DataInsight"}:
276
  return True
277
 
@@ -526,33 +529,88 @@ def _is_bad_generated_reply(text: str, user_text: str = "") -> bool:
526
 
527
  return False
528
 
 
 
 
 
 
 
 
 
529
  def _pick_teaching_line(
530
  chunks: List[RetrievedChunk],
531
  current_reply: str,
 
532
  ) -> Optional[str]:
533
  if not chunks:
534
  return None
535
 
536
  reply_keywords = _extract_keywords(current_reply)
 
537
  best_line = None
538
- best_score = -1
 
 
 
 
 
539
 
540
  for chunk in chunks:
541
- text = (chunk.text or "").strip().replace("\n", " ")
 
 
 
 
542
  if not text:
543
  continue
544
- if len(text) > 180:
545
- text = text[:177].rstrip() + "..."
546
 
547
- chunk_keywords = _extract_keywords(text)
548
- novelty = len(chunk_keywords - reply_keywords)
549
- overlap = len(chunk_keywords & reply_keywords)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
 
551
- score = novelty - 0.3 * overlap
552
  if score > best_score:
553
  best_score = score
554
  best_line = text
555
 
 
 
 
556
  return best_line
557
 
558
  class ConversationEngine:
@@ -676,10 +734,14 @@ class ConversationEngine:
676
  result.used_retrieval = True
677
  result.teaching_chunks = filtered
678
 
679
- if selected_chunks and resolved_help_mode in {"walkthrough", "step_by_step", "method", "explain", "concept"}:
680
- teaching_line = _pick_teaching_line(selected_chunks, reply)
681
- if teaching_line:
682
- reply = f"{reply}\n\nKey idea: {teaching_line}"
 
 
 
 
683
 
684
  should_try_generator = (
685
  self.generator is not None
 
266
  def should_retrieve(intent: str, solved: bool, raw_user_text: str, category: Optional[str] = None) -> bool:
267
  normalized_category = normalize_category(category)
268
 
269
+ if intent in {"walkthrough", "step_by_step", "method", "explain", "concept", "definition", "instruction"}:
270
+ return True
271
+
272
+ if intent == "hint":
273
+ return solved is False or normalized_category in {"Quantitative", "Verbal", "DataInsight"}
274
+
275
  if _is_direct_solve_request(raw_user_text, intent):
276
  return (not solved) and normalized_category in {"Verbal", "DataInsight"}
277
 
 
 
 
278
  if not solved and normalized_category in {"Verbal", "DataInsight"}:
279
  return True
280
 
 
529
 
530
  return False
531
 
532
+ def _clean_teaching_text(text: str) -> str:
533
+ text = normalize_spaces((text or "").replace("\n", " ").strip())
534
+ text = re.sub(r"^[\-\•\*\d\.\)\s]+", "", text)
535
+ if len(text) > 160:
536
+ text = text[:157].rstrip() + "..."
537
+ return text
538
+
539
+
540
  def _pick_teaching_line(
541
  chunks: List[RetrievedChunk],
542
  current_reply: str,
543
+ question_text: str,
544
  ) -> Optional[str]:
545
  if not chunks:
546
  return None
547
 
548
  reply_keywords = _extract_keywords(current_reply)
549
+ question_keywords = _extract_keywords(question_text)
550
  best_line = None
551
+ best_score = float("-inf")
552
+
553
+ principle_markers = {
554
+ "remember", "key", "idea", "rule", "method", "approach", "strategy",
555
+ "useful", "helps", "means", "convert", "rewrite", "isolate", "check",
556
+ }
557
 
558
  for chunk in chunks:
559
+ raw_text = (chunk.text or "").strip()
560
+ if not raw_text:
561
+ continue
562
+
563
+ text = _clean_teaching_text(raw_text)
564
  if not text:
565
  continue
 
 
566
 
567
+ lower_text = text.lower()
568
+ chunk_keywords = _extract_keywords(lower_text)
569
+
570
+ novelty_vs_reply = len(chunk_keywords - reply_keywords)
571
+ novelty_vs_question = len(chunk_keywords - question_keywords)
572
+ overlap_with_reply = len(chunk_keywords & reply_keywords)
573
+ overlap_with_question = len(chunk_keywords & question_keywords)
574
+
575
+ principle_bonus = 0.0
576
+ if any(marker in lower_text for marker in principle_markers):
577
+ principle_bonus += 1.5
578
+
579
+ if any(
580
+ phrase in lower_text
581
+ for phrase in [
582
+ "convert percent to decimal",
583
+ "undo operations in reverse order",
584
+ "set up an equation",
585
+ "part-whole relationship",
586
+ "isolate the variable",
587
+ ]
588
+ ):
589
+ principle_bonus += 2.0
590
+
591
+ too_similar_penalty = 0.0
592
+ if overlap_with_reply >= max(3, novelty_vs_reply + 1):
593
+ too_similar_penalty += 2.0
594
+
595
+ score = (
596
+ 1.8 * novelty_vs_reply
597
+ + 1.0 * novelty_vs_question
598
+ + principle_bonus
599
+ - 0.5 * overlap_with_question
600
+ - 0.8 * overlap_with_reply
601
+ - too_similar_penalty
602
+ )
603
+
604
+ if len(text.split()) < 5:
605
+ score -= 2.0
606
 
 
607
  if score > best_score:
608
  best_score = score
609
  best_line = text
610
 
611
+ if best_score < 1.0:
612
+ return None
613
+
614
  return best_line
615
 
616
  class ConversationEngine:
 
734
  result.used_retrieval = True
735
  result.teaching_chunks = filtered
736
 
737
+ if selected_chunks and resolved_help_mode in {"walkthrough", "step_by_step", "method", "explain", "concept"}:
738
+ teaching_line = _pick_teaching_line(
739
+ chunks=selected_chunks,
740
+ current_reply=reply,
741
+ question_text=solver_input,
742
+ )
743
+ if teaching_line:
744
+ reply = f"{reply}\n\nKey idea: {teaching_line}"
745
 
746
  should_try_generator = (
747
  self.generator is not None