Update conversation_logic.py
Browse files- conversation_logic.py +105 -14
conversation_logic.py
CHANGED
|
@@ -222,12 +222,17 @@ def _specific_topic_from_question(question_text: str, fallback_topic: str, class
|
|
| 222 |
return "median"
|
| 223 |
if "range" in q:
|
| 224 |
return "range"
|
|
|
|
|
|
|
| 225 |
if (
|
| 226 |
"ratio" in q
|
| 227 |
or re.search(r"\b[a-z]\s*/\s*[a-z]\b", q)
|
| 228 |
or re.search(r"\b\d+\s*/\s*\d+\b", q)
|
|
|
|
| 229 |
):
|
| 230 |
return "ratio"
|
|
|
|
|
|
|
| 231 |
if topic == "data" and any(k in q for k in ["dataset", "table", "chart", "graph"]):
|
| 232 |
return "statistics"
|
| 233 |
return topic
|
|
@@ -263,6 +268,11 @@ def _build_topic_query_reply(question_text: str, fallback_topic: str, classified
|
|
| 263 |
"- This is a percent question.\n"
|
| 264 |
"- The key skill is identifying the correct base quantity before applying the percent relationship."
|
| 265 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
label = specific if specific != "general" else (cat.lower() if cat else "quantitative reasoning")
|
| 268 |
return f"- This looks like a {label} question."
|
|
@@ -635,6 +645,66 @@ def _question_specific_percent_reply(question_text: str) -> str:
|
|
| 635 |
)
|
| 636 |
|
| 637 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
def _build_question_specific_reply(
|
| 639 |
*,
|
| 640 |
question_text: str,
|
|
@@ -652,9 +722,30 @@ def _build_question_specific_reply(
|
|
| 652 |
if not q:
|
| 653 |
return ""
|
| 654 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
if any(k in low for k in ["variability", "spread", "standard deviation"]):
|
| 656 |
return _question_specific_variability_reply(options_text)
|
| 657 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
if topic in {"ratio", "algebra"}:
|
| 659 |
if re.search(r"\b[a-z]\s*/\s*[a-z]\s*=\s*\d+\s*/\s*\d+", low):
|
| 660 |
return _question_specific_ratio_reply(q)
|
|
@@ -680,10 +771,12 @@ def _build_question_specific_reply(
|
|
| 680 |
"- Then focus on the exact expression the question asks for, rather than solving more than you need to."
|
| 681 |
)
|
| 682 |
|
| 683 |
-
if
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
|
|
|
|
|
|
| 687 |
|
| 688 |
return ""
|
| 689 |
|
|
@@ -940,6 +1033,11 @@ class ConversationEngine:
|
|
| 940 |
|
| 941 |
if input_type == "topic_query":
|
| 942 |
support_topic = fallback_pack.get("topic") if fallback_pack else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 943 |
topic_reply_core = _build_topic_query_reply(
|
| 944 |
question_text=solver_input,
|
| 945 |
fallback_topic=support_topic,
|
|
@@ -953,9 +1051,9 @@ class ConversationEngine:
|
|
| 953 |
transparency=transparency,
|
| 954 |
help_mode="answer",
|
| 955 |
hint_stage=hint_stage,
|
| 956 |
-
topic=
|
| 957 |
)
|
| 958 |
-
result.topic =
|
| 959 |
result.reply = reply
|
| 960 |
result.help_mode = "answer"
|
| 961 |
result.meta["response_source"] = "topic_classifier"
|
|
@@ -993,15 +1091,8 @@ class ConversationEngine:
|
|
| 993 |
|
| 994 |
if fallback_pack and fallback_pack.get("topic") == "statistics":
|
| 995 |
qlow = (solver_input or "").lower()
|
| 996 |
-
wants_topic = input_type == "topic_query"
|
| 997 |
if any(k in qlow for k in ["variability", "spread", "standard deviation"]):
|
| 998 |
-
if
|
| 999 |
-
fallback_reply_core = (
|
| 1000 |
-
"- This is a statistics / data insight question about variability (spread).\n"
|
| 1001 |
-
"- Focus on how spread out each dataset is rather than the average.\n"
|
| 1002 |
-
"- Compare how far the outer values sit from the middle value in each set."
|
| 1003 |
-
)
|
| 1004 |
-
elif resolved_help_mode == "answer":
|
| 1005 |
fallback_reply_core = (
|
| 1006 |
"- Notice this is asking about variability, which means spread, not the mean.\n"
|
| 1007 |
"- Compare how far the smallest and largest values sit from the middle value in each dataset.\n"
|
|
|
|
| 222 |
return "median"
|
| 223 |
if "range" in q:
|
| 224 |
return "range"
|
| 225 |
+
if any(k in q for k in ["probability", "chance", "odds", "at random", "chosen at random"]):
|
| 226 |
+
return "probability"
|
| 227 |
if (
|
| 228 |
"ratio" in q
|
| 229 |
or re.search(r"\b[a-z]\s*/\s*[a-z]\b", q)
|
| 230 |
or re.search(r"\b\d+\s*/\s*\d+\b", q)
|
| 231 |
+
or "proportion" in q
|
| 232 |
):
|
| 233 |
return "ratio"
|
| 234 |
+
if "percent" in q or "%" in q:
|
| 235 |
+
return "percent"
|
| 236 |
if topic == "data" and any(k in q for k in ["dataset", "table", "chart", "graph"]):
|
| 237 |
return "statistics"
|
| 238 |
return topic
|
|
|
|
| 268 |
"- This is a percent question.\n"
|
| 269 |
"- The key skill is identifying the correct base quantity before applying the percent relationship."
|
| 270 |
)
|
| 271 |
+
if specific == "probability":
|
| 272 |
+
return (
|
| 273 |
+
"- This is a probability question.\n"
|
| 274 |
+
"- The key skill is deciding what counts as a successful outcome and then comparing favorable outcomes with the total possible outcomes."
|
| 275 |
+
)
|
| 276 |
|
| 277 |
label = specific if specific != "general" else (cat.lower() if cat else "quantitative reasoning")
|
| 278 |
return f"- This looks like a {label} question."
|
|
|
|
| 645 |
)
|
| 646 |
|
| 647 |
|
| 648 |
+
|
| 649 |
+
def _question_specific_probability_reply(question_text: str, options_text: Optional[List[str]] = None) -> str:
|
| 650 |
+
q = _clean_text(question_text)
|
| 651 |
+
low = q.lower()
|
| 652 |
+
option_count = len(options_text or [])
|
| 653 |
+
|
| 654 |
+
single_draw_markers = [
|
| 655 |
+
"chosen at random",
|
| 656 |
+
"select one",
|
| 657 |
+
"choose one",
|
| 658 |
+
"one ball",
|
| 659 |
+
"one card",
|
| 660 |
+
"one marble",
|
| 661 |
+
"one object",
|
| 662 |
+
"selected at random",
|
| 663 |
+
"picked at random",
|
| 664 |
+
]
|
| 665 |
+
container_markers = [
|
| 666 |
+
"box contains",
|
| 667 |
+
"bag contains",
|
| 668 |
+
"urn contains",
|
| 669 |
+
"deck",
|
| 670 |
+
"balls",
|
| 671 |
+
"cards",
|
| 672 |
+
"marbles",
|
| 673 |
+
"dice",
|
| 674 |
+
"coin",
|
| 675 |
+
]
|
| 676 |
+
|
| 677 |
+
if any(m in low for m in single_draw_markers) or (
|
| 678 |
+
"probability" in low and any(m in low for m in container_markers)
|
| 679 |
+
):
|
| 680 |
+
lines = [
|
| 681 |
+
"- Start by deciding what counts as a successful outcome in this question.",
|
| 682 |
+
"- Then count the total number of possible outcomes in the container or sample space.",
|
| 683 |
+
"- Set up the probability as favorable outcomes over total outcomes before comparing the answer choices.",
|
| 684 |
+
]
|
| 685 |
+
if option_count:
|
| 686 |
+
lines.append("- Use that fraction to match the answer choices instead of doing extra work.")
|
| 687 |
+
return "\n".join(lines)
|
| 688 |
+
|
| 689 |
+
if "at least" in low:
|
| 690 |
+
return (
|
| 691 |
+
"- Start by deciding whether the complement is easier than counting the requested cases directly.\n"
|
| 692 |
+
"- For an 'at least' question, it is often simpler to find the probability of the opposite event first.\n"
|
| 693 |
+
"- Then subtract that result from 1 at the end."
|
| 694 |
+
)
|
| 695 |
+
|
| 696 |
+
if any(k in low for k in ["and", "both", "then", "after"]) and any(k in low for k in ["probability", "chosen", "random"]):
|
| 697 |
+
return (
|
| 698 |
+
"- First identify whether the events happen together or separately.\n"
|
| 699 |
+
"- Then decide whether you should multiply probabilities, add them, or use the complement.\n"
|
| 700 |
+
"- Keep track of whether the total outcomes change after each step."
|
| 701 |
+
)
|
| 702 |
+
|
| 703 |
+
return (
|
| 704 |
+
"- Start by identifying the favorable outcomes and the total possible outcomes.\n"
|
| 705 |
+
"- Then build the probability as favorable over total before simplifying or matching an answer choice."
|
| 706 |
+
)
|
| 707 |
+
|
| 708 |
def _build_question_specific_reply(
|
| 709 |
*,
|
| 710 |
question_text: str,
|
|
|
|
| 722 |
if not q:
|
| 723 |
return ""
|
| 724 |
|
| 725 |
+
explicit_help_ask = (
|
| 726 |
+
input_type in {"hint", "next_hint", "confusion"}
|
| 727 |
+
or any(
|
| 728 |
+
phrase in user_low
|
| 729 |
+
for phrase in [
|
| 730 |
+
"how do i solve",
|
| 731 |
+
"how do i do this",
|
| 732 |
+
"what do i do first",
|
| 733 |
+
"what should i do first",
|
| 734 |
+
"how should i start",
|
| 735 |
+
"where do i start",
|
| 736 |
+
"first step",
|
| 737 |
+
]
|
| 738 |
+
)
|
| 739 |
+
)
|
| 740 |
+
|
| 741 |
if any(k in low for k in ["variability", "spread", "standard deviation"]):
|
| 742 |
return _question_specific_variability_reply(options_text)
|
| 743 |
|
| 744 |
+
if topic == "probability" or any(
|
| 745 |
+
k in low for k in ["probability", "chance", "odds", "at random", "chosen at random"]
|
| 746 |
+
):
|
| 747 |
+
return _question_specific_probability_reply(q, options_text)
|
| 748 |
+
|
| 749 |
if topic in {"ratio", "algebra"}:
|
| 750 |
if re.search(r"\b[a-z]\s*/\s*[a-z]\s*=\s*\d+\s*/\s*\d+", low):
|
| 751 |
return _question_specific_ratio_reply(q)
|
|
|
|
| 771 |
"- Then focus on the exact expression the question asks for, rather than solving more than you need to."
|
| 772 |
)
|
| 773 |
|
| 774 |
+
if explicit_help_ask:
|
| 775 |
+
if topic == "percent":
|
| 776 |
+
return _question_specific_percent_reply(q)
|
| 777 |
+
if topic == "probability":
|
| 778 |
+
return _question_specific_probability_reply(q, options_text)
|
| 779 |
+
return "- Start by identifying the main relationship in the question, then use that relationship to set up the first step."
|
| 780 |
|
| 781 |
return ""
|
| 782 |
|
|
|
|
| 1033 |
|
| 1034 |
if input_type == "topic_query":
|
| 1035 |
support_topic = fallback_pack.get("topic") if fallback_pack else ""
|
| 1036 |
+
final_topic = _specific_topic_from_question(
|
| 1037 |
+
solver_input,
|
| 1038 |
+
support_topic,
|
| 1039 |
+
question_topic if question_topic else "general",
|
| 1040 |
+
) or support_topic or question_topic or "general"
|
| 1041 |
topic_reply_core = _build_topic_query_reply(
|
| 1042 |
question_text=solver_input,
|
| 1043 |
fallback_topic=support_topic,
|
|
|
|
| 1051 |
transparency=transparency,
|
| 1052 |
help_mode="answer",
|
| 1053 |
hint_stage=hint_stage,
|
| 1054 |
+
topic=final_topic,
|
| 1055 |
)
|
| 1056 |
+
result.topic = final_topic
|
| 1057 |
result.reply = reply
|
| 1058 |
result.help_mode = "answer"
|
| 1059 |
result.meta["response_source"] = "topic_classifier"
|
|
|
|
| 1091 |
|
| 1092 |
if fallback_pack and fallback_pack.get("topic") == "statistics":
|
| 1093 |
qlow = (solver_input or "").lower()
|
|
|
|
| 1094 |
if any(k in qlow for k in ["variability", "spread", "standard deviation"]):
|
| 1095 |
+
if resolved_help_mode == "answer":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1096 |
fallback_reply_core = (
|
| 1097 |
"- Notice this is asking about variability, which means spread, not the mean.\n"
|
| 1098 |
"- Compare how far the smallest and largest values sit from the middle value in each dataset.\n"
|