Upload 5 files
Browse files- conversation_logic.py +24 -207
- formatting.py +75 -123
- question_fallback_router.py +387 -364
- question_support_loader.py +109 -77
- solver_router.py +3 -18
conversation_logic.py
CHANGED
|
@@ -1050,19 +1050,26 @@ def _should_try_solver(is_quant: bool, help_mode: str, solver_input: str) -> boo
|
|
| 1050 |
return help_mode in {"answer", "walkthrough", "instruction", "hint", "step_by_step"}
|
| 1051 |
|
| 1052 |
|
| 1053 |
-
def
|
| 1054 |
if not fallback_pack:
|
| 1055 |
return False
|
| 1056 |
support_source = str(fallback_pack.get("support_source", "")).strip().lower()
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1064 |
if help_mode in {"hint", "walkthrough", "instruction", "step_by_step", "explain", "method"}:
|
| 1065 |
-
return
|
| 1066 |
return False
|
| 1067 |
|
| 1068 |
|
|
@@ -1124,208 +1131,17 @@ class ConversationEngine:
|
|
| 1124 |
question_topic = _normalize_classified_topic(classification.get("topic"), inferred_category, solver_input)
|
| 1125 |
|
| 1126 |
resolved_intent = intent or detect_intent(user_text, help_mode)
|
| 1127 |
-
if input_type in {"hint", "next_hint"}:
|
| 1128 |
-
resolved_intent = "hint"
|
| 1129 |
-
elif input_type == "confusion":
|
| 1130 |
-
resolved_intent = "method"
|
| 1131 |
-
elif input_type in {"solve", "question"} and resolved_intent in {"hint", "walkthrough", "step_by_step"}:
|
| 1132 |
-
resolved_intent = "answer"
|
| 1133 |
-
|
| 1134 |
-
resolved_help_mode = help_mode or intent_to_help_mode(resolved_intent)
|
| 1135 |
-
if input_type in {"hint", "next_hint"}:
|
| 1136 |
-
resolved_help_mode = "hint"
|
| 1137 |
-
elif input_type == "confusion":
|
| 1138 |
-
resolved_help_mode = "explain"
|
| 1139 |
-
elif resolved_help_mode == "step_by_step":
|
| 1140 |
-
resolved_help_mode = "walkthrough"
|
| 1141 |
-
|
| 1142 |
-
prior_hint_stage = int(state.get("hint_stage", 0) or 0)
|
| 1143 |
-
history_hint_stage = _history_hint_stage(chat_history)
|
| 1144 |
-
hint_stage = _compute_hint_stage(input_type, prior_hint_stage, history_hint_stage)
|
| 1145 |
-
|
| 1146 |
-
is_quant = bool(solver_input) and (
|
| 1147 |
-
inferred_category == "Quantitative" or is_quant_question(solver_input)
|
| 1148 |
-
)
|
| 1149 |
-
|
| 1150 |
-
result = SolverResult(
|
| 1151 |
-
domain="quant" if is_quant else "general",
|
| 1152 |
-
solved=False,
|
| 1153 |
-
help_mode=resolved_help_mode,
|
| 1154 |
-
topic=question_topic if is_quant else "general",
|
| 1155 |
-
used_retrieval=False,
|
| 1156 |
-
used_generator=False,
|
| 1157 |
-
steps=[],
|
| 1158 |
-
teaching_chunks=[],
|
| 1159 |
-
meta={},
|
| 1160 |
-
)
|
| 1161 |
-
|
| 1162 |
-
solver_result: Optional[SolverResult] = None
|
| 1163 |
-
if _should_try_solver(is_quant, resolved_help_mode, solver_input):
|
| 1164 |
-
try:
|
| 1165 |
-
solver_result = route_solver(solver_input)
|
| 1166 |
-
except Exception:
|
| 1167 |
-
solver_result = None
|
| 1168 |
-
_apply_safe_step_sanitization(solver_result)
|
| 1169 |
-
|
| 1170 |
-
explainer_result = None
|
| 1171 |
-
explainer_understood = False
|
| 1172 |
-
explainer_scaffold: Dict[str, Any] = {}
|
| 1173 |
-
if solver_input:
|
| 1174 |
-
try:
|
| 1175 |
-
explainer_result = route_explainer(solver_input)
|
| 1176 |
-
except Exception:
|
| 1177 |
-
explainer_result = None
|
| 1178 |
-
if explainer_result is not None and getattr(explainer_result, "understood", False):
|
| 1179 |
-
explainer_understood = True
|
| 1180 |
-
explainer_scaffold = _extract_explainer_scaffold(explainer_result)
|
| 1181 |
-
|
| 1182 |
-
fallback_reply_core = ""
|
| 1183 |
-
fallback_pack: Dict[str, Any] = {}
|
| 1184 |
-
if solver_input:
|
| 1185 |
-
fallback_reply_core, fallback_pack = _build_fallback_reply(
|
| 1186 |
-
question_id=question_id,
|
| 1187 |
-
question_text=solver_input,
|
| 1188 |
-
options_text=options_text,
|
| 1189 |
-
topic=question_topic,
|
| 1190 |
-
category=inferred_category,
|
| 1191 |
-
help_mode=resolved_help_mode,
|
| 1192 |
-
hint_stage=hint_stage,
|
| 1193 |
-
verbosity=verbosity,
|
| 1194 |
-
)
|
| 1195 |
-
question_specific_reply_core = _build_question_specific_reply(
|
| 1196 |
-
question_text=solver_input,
|
| 1197 |
-
options_text=options_text,
|
| 1198 |
-
classified_topic=question_topic,
|
| 1199 |
-
help_mode=resolved_help_mode,
|
| 1200 |
-
input_type=input_type,
|
| 1201 |
-
user_text=user_text,
|
| 1202 |
-
)
|
| 1203 |
-
|
| 1204 |
-
if solver_result is not None:
|
| 1205 |
-
result.meta = result.meta or {}
|
| 1206 |
-
solver_topic = getattr(solver_result, "topic", None) or "unknown"
|
| 1207 |
-
|
| 1208 |
-
compatible_topics = {
|
| 1209 |
-
question_topic,
|
| 1210 |
-
"general_quant",
|
| 1211 |
-
"general",
|
| 1212 |
-
"unknown",
|
| 1213 |
-
}
|
| 1214 |
-
|
| 1215 |
-
if question_topic == "algebra":
|
| 1216 |
-
compatible_topics.update({"ratio"})
|
| 1217 |
-
elif question_topic == "ratio":
|
| 1218 |
-
compatible_topics.update({"algebra"})
|
| 1219 |
-
elif question_topic == "percent":
|
| 1220 |
-
compatible_topics.update({"ratio", "algebra"})
|
| 1221 |
-
|
| 1222 |
-
if solver_topic in compatible_topics:
|
| 1223 |
-
result = solver_result
|
| 1224 |
-
result.domain = "quant"
|
| 1225 |
-
result.meta = result.meta or {}
|
| 1226 |
-
result.topic = question_topic if question_topic else solver_topic
|
| 1227 |
-
result.meta["solver_topic_accepted"] = solver_topic
|
| 1228 |
-
else:
|
| 1229 |
-
result.meta["solver_topic_rejected"] = solver_topic
|
| 1230 |
-
result.meta["solver_topic_expected"] = question_topic
|
| 1231 |
-
result.topic = question_topic if is_quant else result.topic
|
| 1232 |
-
else:
|
| 1233 |
-
result.meta = result.meta or {}
|
| 1234 |
-
result.topic = question_topic if is_quant else result.topic
|
| 1235 |
-
|
| 1236 |
-
_apply_safe_step_sanitization(result)
|
| 1237 |
-
solver_steps = _get_result_steps(result)
|
| 1238 |
-
solver_has_steps = bool(solver_steps)
|
| 1239 |
-
prefer_question_support = _should_prefer_question_support(resolved_help_mode, fallback_pack)
|
| 1240 |
-
direct_solve_request = _is_direct_solve_request(user_text or solver_input, resolved_intent)
|
| 1241 |
-
solver_topic_ok = result.meta.get("solver_topic_rejected") is None
|
| 1242 |
-
|
| 1243 |
-
result.help_mode = resolved_help_mode
|
| 1244 |
-
result.meta = result.meta or {}
|
| 1245 |
-
result.meta["hint_stage"] = hint_stage
|
| 1246 |
-
result.meta["max_stage"] = 4
|
| 1247 |
-
result.meta["recovered_question_text"] = solver_input
|
| 1248 |
-
result.meta["question_id"] = question_id
|
| 1249 |
-
result.meta["classified_topic"] = question_topic if question_topic else "general"
|
| 1250 |
-
result.meta["explainer_understood"] = explainer_understood
|
| 1251 |
-
result.meta["explainer_scaffold"] = explainer_scaffold
|
| 1252 |
-
|
| 1253 |
-
if input_type == "topic_query":
|
| 1254 |
-
support_topic = fallback_pack.get("topic") if fallback_pack else ""
|
| 1255 |
-
final_topic = _specific_topic_from_question(
|
| 1256 |
-
solver_input,
|
| 1257 |
-
support_topic,
|
| 1258 |
-
question_topic if question_topic else "general",
|
| 1259 |
-
) or support_topic or question_topic or "general"
|
| 1260 |
-
topic_reply_core = _build_topic_query_reply(
|
| 1261 |
-
question_text=solver_input,
|
| 1262 |
-
fallback_topic=support_topic,
|
| 1263 |
-
classified_topic=question_topic if question_topic else "general",
|
| 1264 |
-
category=inferred_category if inferred_category else "General",
|
| 1265 |
-
)
|
| 1266 |
-
reply = format_reply(
|
| 1267 |
-
topic_reply_core,
|
| 1268 |
-
tone=tone,
|
| 1269 |
-
verbosity=verbosity,
|
| 1270 |
-
transparency=transparency,
|
| 1271 |
-
help_mode="answer",
|
| 1272 |
-
hint_stage=hint_stage,
|
| 1273 |
-
topic=final_topic,
|
| 1274 |
-
)
|
| 1275 |
-
result.topic = final_topic
|
| 1276 |
-
result.reply = reply
|
| 1277 |
-
result.help_mode = "answer"
|
| 1278 |
-
result.meta["response_source"] = "topic_classifier"
|
| 1279 |
-
result.meta["question_support_used"] = bool(fallback_pack)
|
| 1280 |
-
result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None
|
| 1281 |
-
result.meta["question_support_topic"] = support_topic or None
|
| 1282 |
-
result.meta["help_mode"] = "answer"
|
| 1283 |
-
result.meta["intent"] = "topic_query"
|
| 1284 |
-
result.meta["question_text"] = solver_input or ""
|
| 1285 |
-
result.meta["options_count"] = len(options_text or [])
|
| 1286 |
-
result.meta["category"] = inferred_category if inferred_category else "General"
|
| 1287 |
-
result.meta["user_last_input_type"] = input_type
|
| 1288 |
-
result.meta["built_on_previous_turn"] = built_on_previous_turn
|
| 1289 |
-
state = _update_session_state(
|
| 1290 |
-
state,
|
| 1291 |
-
question_text=solver_input,
|
| 1292 |
-
question_id=question_id,
|
| 1293 |
-
hint_stage=hint_stage,
|
| 1294 |
-
user_last_input_type=input_type,
|
| 1295 |
-
built_on_previous_turn=built_on_previous_turn,
|
| 1296 |
-
help_mode="answer",
|
| 1297 |
-
intent="topic_query",
|
| 1298 |
-
topic=result.topic,
|
| 1299 |
-
category=inferred_category,
|
| 1300 |
-
)
|
| 1301 |
-
result.meta["session_state"] = state
|
| 1302 |
-
result.meta["used_retrieval"] = False
|
| 1303 |
-
result.meta["used_generator"] = False
|
| 1304 |
-
result.meta["can_reveal_answer"] = False
|
| 1305 |
-
result.answer_letter = None
|
| 1306 |
-
result.answer_value = None
|
| 1307 |
-
result.internal_answer = None
|
| 1308 |
-
result.meta["internal_answer"] = None
|
| 1309 |
-
return result
|
| 1310 |
-
|
| 1311 |
-
if fallback_pack and fallback_pack.get("topic") == "statistics":
|
| 1312 |
-
qlow = (solver_input or "").lower()
|
| 1313 |
-
if any(k in qlow for k in ["variability", "spread", "standard deviation"]):
|
| 1314 |
-
if resolved_help_mode == "answer":
|
| 1315 |
-
fallback_reply_core = (
|
| 1316 |
-
"- Notice this is asking about variability, which means spread, not the mean.\n"
|
| 1317 |
-
"- Compare how far the smallest and largest values sit from the middle value in each dataset.\n"
|
| 1318 |
-
"- The set with the widest spread has the greatest variability."
|
| 1319 |
-
)
|
| 1320 |
-
|
| 1321 |
if input_type in {"hint", "next_hint"}:
|
| 1322 |
hint_lines: List[str] = []
|
|
|
|
| 1323 |
|
| 1324 |
if fallback_pack:
|
| 1325 |
fallback_hints = _safe_meta_list(fallback_pack.get("hint_ladder", []))
|
| 1326 |
if fallback_hints:
|
| 1327 |
idx = min(max(hint_stage - 1, 0), len(fallback_hints) - 1)
|
| 1328 |
hint_lines = [fallback_hints[idx]]
|
|
|
|
|
|
|
| 1329 |
|
| 1330 |
if not hint_lines:
|
| 1331 |
custom_ladder = _question_specific_hint_ladder(
|
|
@@ -1336,12 +1152,13 @@ class ConversationEngine:
|
|
| 1336 |
if custom_ladder:
|
| 1337 |
idx = min(max(hint_stage - 1, 0), len(custom_ladder) - 1)
|
| 1338 |
hint_lines = [custom_ladder[idx]]
|
|
|
|
|
|
|
| 1339 |
|
| 1340 |
if not hint_lines and explainer_scaffold:
|
| 1341 |
ladder = _safe_meta_list(explainer_scaffold.get("hint_ladder", []))
|
| 1342 |
first_move = _safe_meta_text(explainer_scaffold.get("first_move"))
|
| 1343 |
next_hint_text = _safe_meta_text(explainer_scaffold.get("next_hint"))
|
| 1344 |
-
|
| 1345 |
if hint_stage <= 1 and first_move:
|
| 1346 |
hint_lines = [first_move]
|
| 1347 |
elif ladder:
|
|
@@ -1360,7 +1177,7 @@ class ConversationEngine:
|
|
| 1360 |
hint_lines = [_minimal_generic_reply(inferred_category)]
|
| 1361 |
|
| 1362 |
reply_core = "\n".join(f"- {line}" for line in hint_lines if str(line).strip())
|
| 1363 |
-
result.meta["response_source"] = "hint_ladder"
|
| 1364 |
result.meta["question_support_used"] = bool(fallback_pack)
|
| 1365 |
result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None
|
| 1366 |
result.meta["question_support_topic"] = fallback_pack.get("topic") if fallback_pack else None
|
|
@@ -1377,7 +1194,7 @@ class ConversationEngine:
|
|
| 1377 |
|
| 1378 |
elif question_specific_reply_core and (
|
| 1379 |
input_type not in {"hint", "next_hint"}
|
| 1380 |
-
and not (
|
| 1381 |
and (
|
| 1382 |
_is_help_first_mode(resolved_help_mode)
|
| 1383 |
or input_type in {"other", "confusion"}
|
|
@@ -1488,7 +1305,7 @@ class ConversationEngine:
|
|
| 1488 |
)
|
| 1489 |
elif fallback_reply_core:
|
| 1490 |
reply_core = fallback_reply_core
|
| 1491 |
-
result.meta["response_source"] = "fallback"
|
| 1492 |
result.meta["question_support_used"] = bool(fallback_pack)
|
| 1493 |
result.meta["question_support_source"] = fallback_pack.get("support_source")
|
| 1494 |
result.meta["question_support_topic"] = fallback_pack.get("topic")
|
|
|
|
| 1050 |
return help_mode in {"answer", "walkthrough", "instruction", "hint", "step_by_step"}
|
| 1051 |
|
| 1052 |
|
| 1053 |
+
def _support_pack_is_strong(fallback_pack: Dict[str, Any]) -> bool:
|
| 1054 |
if not fallback_pack:
|
| 1055 |
return False
|
| 1056 |
support_source = str(fallback_pack.get("support_source", "")).strip().lower()
|
| 1057 |
+
support_match = fallback_pack.get("support_match") or {}
|
| 1058 |
+
match_mode = str(support_match.get("mode", "")).strip().lower()
|
| 1059 |
+
if support_source in {"question_bank", "question_bank_refined"}:
|
| 1060 |
+
return True
|
| 1061 |
+
if match_mode in {"question_id", "signature_exact", "text_exact", "signature_unordered", "fuzzy"}:
|
| 1062 |
+
return True
|
| 1063 |
+
if support_source == "generated_question_specific":
|
| 1064 |
+
return bool(fallback_pack.get("topic") and _safe_meta_list(fallback_pack.get("hint_ladder", [])))
|
| 1065 |
+
return bool(fallback_pack)
|
| 1066 |
+
|
| 1067 |
+
|
| 1068 |
+
def _should_prefer_question_support(help_mode: str, fallback_pack: Dict[str, Any]) -> bool:
|
| 1069 |
+
if not fallback_pack:
|
| 1070 |
+
return False
|
| 1071 |
if help_mode in {"hint", "walkthrough", "instruction", "step_by_step", "explain", "method"}:
|
| 1072 |
+
return _support_pack_is_strong(fallback_pack)
|
| 1073 |
return False
|
| 1074 |
|
| 1075 |
|
|
|
|
| 1131 |
question_topic = _normalize_classified_topic(classification.get("topic"), inferred_category, solver_input)
|
| 1132 |
|
| 1133 |
resolved_intent = intent or detect_intent(user_text, help_mode)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1134 |
if input_type in {"hint", "next_hint"}:
|
| 1135 |
hint_lines: List[str] = []
|
| 1136 |
+
support_is_strong = _support_pack_is_strong(fallback_pack)
|
| 1137 |
|
| 1138 |
if fallback_pack:
|
| 1139 |
fallback_hints = _safe_meta_list(fallback_pack.get("hint_ladder", []))
|
| 1140 |
if fallback_hints:
|
| 1141 |
idx = min(max(hint_stage - 1, 0), len(fallback_hints) - 1)
|
| 1142 |
hint_lines = [fallback_hints[idx]]
|
| 1143 |
+
if verbosity >= 0.62 and idx + 1 < len(fallback_hints):
|
| 1144 |
+
hint_lines.append(fallback_hints[idx + 1])
|
| 1145 |
|
| 1146 |
if not hint_lines:
|
| 1147 |
custom_ladder = _question_specific_hint_ladder(
|
|
|
|
| 1152 |
if custom_ladder:
|
| 1153 |
idx = min(max(hint_stage - 1, 0), len(custom_ladder) - 1)
|
| 1154 |
hint_lines = [custom_ladder[idx]]
|
| 1155 |
+
if verbosity >= 0.62 and idx + 1 < len(custom_ladder):
|
| 1156 |
+
hint_lines.append(custom_ladder[idx + 1])
|
| 1157 |
|
| 1158 |
if not hint_lines and explainer_scaffold:
|
| 1159 |
ladder = _safe_meta_list(explainer_scaffold.get("hint_ladder", []))
|
| 1160 |
first_move = _safe_meta_text(explainer_scaffold.get("first_move"))
|
| 1161 |
next_hint_text = _safe_meta_text(explainer_scaffold.get("next_hint"))
|
|
|
|
| 1162 |
if hint_stage <= 1 and first_move:
|
| 1163 |
hint_lines = [first_move]
|
| 1164 |
elif ladder:
|
|
|
|
| 1177 |
hint_lines = [_minimal_generic_reply(inferred_category)]
|
| 1178 |
|
| 1179 |
reply_core = "\n".join(f"- {line}" for line in hint_lines if str(line).strip())
|
| 1180 |
+
result.meta["response_source"] = "hint_ladder" if support_is_strong else "hint_router"
|
| 1181 |
result.meta["question_support_used"] = bool(fallback_pack)
|
| 1182 |
result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None
|
| 1183 |
result.meta["question_support_topic"] = fallback_pack.get("topic") if fallback_pack else None
|
|
|
|
| 1194 |
|
| 1195 |
elif question_specific_reply_core and (
|
| 1196 |
input_type not in {"hint", "next_hint"}
|
| 1197 |
+
and not (_support_pack_is_strong(fallback_pack) and fallback_reply_core)
|
| 1198 |
and (
|
| 1199 |
_is_help_first_mode(resolved_help_mode)
|
| 1200 |
or input_type in {"other", "confusion"}
|
|
|
|
| 1305 |
)
|
| 1306 |
elif fallback_reply_core:
|
| 1307 |
reply_core = fallback_reply_core
|
| 1308 |
+
result.meta["response_source"] = "question_support" if _support_pack_is_strong(fallback_pack) else "fallback"
|
| 1309 |
result.meta["question_support_used"] = bool(fallback_pack)
|
| 1310 |
result.meta["question_support_source"] = fallback_pack.get("support_source")
|
| 1311 |
result.meta["question_support_topic"] = fallback_pack.get("topic")
|
formatting.py
CHANGED
|
@@ -19,143 +19,120 @@ def _clean_lines(core: str) -> List[str]:
|
|
| 19 |
for line in (core or "").splitlines():
|
| 20 |
cleaned = line.strip()
|
| 21 |
if cleaned:
|
| 22 |
-
lines.append(cleaned)
|
| 23 |
return lines
|
| 24 |
|
| 25 |
|
| 26 |
def _normalize_key(text: str) -> str:
|
| 27 |
-
|
| 28 |
-
text = text.replace("’", "'")
|
| 29 |
-
text = re.sub(r"\s+", " ", text)
|
| 30 |
-
return text
|
| 31 |
|
| 32 |
|
| 33 |
def _dedupe_lines(lines: List[str]) -> List[str]:
|
| 34 |
seen = set()
|
| 35 |
-
|
| 36 |
for line in lines:
|
| 37 |
key = _normalize_key(line)
|
| 38 |
if key and key not in seen:
|
| 39 |
seen.add(key)
|
| 40 |
-
|
| 41 |
-
return
|
| 42 |
|
| 43 |
|
| 44 |
-
def
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
def _normalize_display_lines(lines: List[str]) -> List[str]:
|
| 49 |
-
|
| 50 |
-
for line in lines:
|
| 51 |
-
item = _strip_bullet_prefix(line)
|
| 52 |
-
if item:
|
| 53 |
-
cleaned.append(item)
|
| 54 |
-
return cleaned
|
| 55 |
|
| 56 |
|
| 57 |
-
def _limit_steps(
|
| 58 |
-
if not
|
| 59 |
return []
|
| 60 |
-
if verbosity < 0.
|
| 61 |
limit = minimum
|
| 62 |
-
elif verbosity < 0.
|
| 63 |
-
limit = max(minimum, 2)
|
| 64 |
-
elif verbosity < 0.
|
| 65 |
-
limit = max(minimum,
|
| 66 |
else:
|
| 67 |
-
limit =
|
| 68 |
-
return
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
def _extract_topic_from_text(text: str, fallback: Optional[str] = None) -> str:
|
| 72 |
-
low = (text or "").lower()
|
| 73 |
-
if fallback:
|
| 74 |
-
return fallback
|
| 75 |
-
if any(word in low for word in ["equation", "variable", "isolate", "algebra"]):
|
| 76 |
-
return "algebra"
|
| 77 |
-
if any(word in low for word in ["percent", "percentage", "%"]):
|
| 78 |
-
return "percent"
|
| 79 |
-
if any(word in low for word in ["ratio", "proportion"]):
|
| 80 |
-
return "ratio"
|
| 81 |
-
if any(word in low for word in ["probability", "outcome", "chance", "odds"]):
|
| 82 |
-
return "probability"
|
| 83 |
-
if any(word in low for word in ["mean", "median", "average", "data", "variance", "standard deviation"]):
|
| 84 |
-
return "statistics"
|
| 85 |
-
if any(word in low for word in ["triangle", "circle", "angle", "area", "perimeter", "circumference", "rectangle"]):
|
| 86 |
-
return "geometry"
|
| 87 |
-
if any(word in low for word in ["integer", "factor", "multiple", "prime", "remainder", "divisible"]):
|
| 88 |
-
return "number_theory"
|
| 89 |
-
return "general"
|
| 90 |
-
|
| 91 |
|
| 92 |
-
def _why_line(topic: Optional[str]) -> str:
|
| 93 |
-
topic = (topic or "general").strip().lower()
|
| 94 |
|
|
|
|
| 95 |
if topic == "algebra":
|
| 96 |
-
return "Why this helps:
|
| 97 |
if topic == "percent":
|
| 98 |
-
return "Why this helps: percent
|
| 99 |
if topic == "ratio":
|
| 100 |
-
return "Why this helps:
|
| 101 |
if topic == "probability":
|
| 102 |
-
return "Why this helps:
|
| 103 |
if topic == "statistics":
|
| 104 |
-
return "Why this helps: statistics questions depend on choosing the right measure before
|
| 105 |
if topic == "geometry":
|
| 106 |
-
return "Why this helps:
|
| 107 |
-
|
| 108 |
-
return "Why this helps: number theory questions usually depend on patterns, divisibility, or factor structure rather than brute force."
|
| 109 |
-
return "Why this helps: identifying the structure first makes the next step clearer and reduces avoidable mistakes."
|
| 110 |
|
| 111 |
|
| 112 |
def _tone_rewrite(line: str, tone: float, position: int = 0) -> str:
|
| 113 |
text = (line or "").strip()
|
| 114 |
if not text:
|
| 115 |
return text
|
| 116 |
-
|
| 117 |
-
if tone < 0.2:
|
| 118 |
return text
|
| 119 |
-
if tone < 0.
|
| 120 |
-
return text[:1].upper() + text[1:] if text else text
|
| 121 |
-
if tone < 0.75:
|
| 122 |
return f"Start here: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
|
| 123 |
-
|
|
|
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
def _transparency_expansion(line: str, topic: str, transparency: float, position: int = 0) -> str:
|
| 127 |
text = (line or "").strip()
|
| 128 |
-
if not text:
|
| 129 |
-
return text
|
| 130 |
-
|
| 131 |
-
if transparency < 0.35:
|
| 132 |
return text
|
| 133 |
if transparency < 0.7:
|
| 134 |
if position == 0:
|
| 135 |
if topic == "algebra":
|
| 136 |
-
return f"{text} This
|
| 137 |
if topic == "percent":
|
| 138 |
-
return f"{text} This keeps
|
| 139 |
if topic == "ratio":
|
| 140 |
-
return f"{text} This
|
| 141 |
if topic == "probability":
|
| 142 |
-
return f"{text} This
|
| 143 |
return text
|
| 144 |
-
|
| 145 |
if position == 0:
|
| 146 |
if topic == "algebra":
|
| 147 |
-
return f"{text} In algebra,
|
| 148 |
if topic == "percent":
|
| 149 |
-
return f"{text} Percent
|
| 150 |
if topic == "ratio":
|
| 151 |
-
return f"{text} Ratio
|
| 152 |
if topic == "probability":
|
| 153 |
-
return f"{text} Probability depends on
|
| 154 |
if topic == "statistics":
|
| 155 |
-
return f"{text} Statistics questions
|
| 156 |
if topic == "geometry":
|
| 157 |
-
return f"{text} Geometry problems
|
| 158 |
-
return f"{text} This
|
| 159 |
return text
|
| 160 |
|
| 161 |
|
|
@@ -179,7 +156,6 @@ def format_reply(
|
|
| 179 |
) -> str:
|
| 180 |
prefix = style_prefix(tone)
|
| 181 |
core = (core or "").strip()
|
| 182 |
-
|
| 183 |
if not core:
|
| 184 |
return prefix or "Start with the structure of the problem."
|
| 185 |
|
|
@@ -189,77 +165,58 @@ def format_reply(
|
|
| 189 |
|
| 190 |
resolved_topic = _extract_topic_from_text(core, topic)
|
| 191 |
normalized_lines = _normalize_display_lines(lines)
|
| 192 |
-
|
| 193 |
output: List[str] = []
|
| 194 |
if prefix:
|
| 195 |
-
output.
|
| 196 |
-
output.append("")
|
| 197 |
|
| 198 |
if help_mode == "hint":
|
|
|
|
| 199 |
if verbosity < 0.25:
|
| 200 |
-
idx = max(0, min(int(hint_stage or 1) - 1, len(normalized_lines) - 1))
|
| 201 |
shown = [normalized_lines[idx]]
|
| 202 |
-
elif verbosity < 0.
|
| 203 |
-
|
| 204 |
-
shown = normalized_lines[idx:idx + 2] or [normalized_lines[idx]]
|
| 205 |
else:
|
| 206 |
-
shown = normalized_lines[: min(
|
| 207 |
-
|
| 208 |
shown = _styled_lines(shown, tone, transparency, resolved_topic)
|
| 209 |
output.append("Hint:")
|
| 210 |
-
for line in shown
|
| 211 |
-
output.append(f"- {line}")
|
| 212 |
-
|
| 213 |
if transparency >= 0.75:
|
| 214 |
-
output.
|
| 215 |
-
output.append(_why_line(resolved_topic))
|
| 216 |
return "\n".join(output).strip()
|
| 217 |
|
| 218 |
if help_mode in {"walkthrough", "instruction", "step_by_step"}:
|
| 219 |
shown = _limit_steps(normalized_lines, verbosity, minimum=2 if help_mode == "walkthrough" else 1)
|
| 220 |
shown = _styled_lines(shown, tone, transparency, resolved_topic)
|
| 221 |
output.append("Walkthrough:" if help_mode == "walkthrough" else "Step-by-step path:")
|
| 222 |
-
for line in shown
|
| 223 |
-
output.append(f"- {line}")
|
| 224 |
-
|
| 225 |
if transparency >= 0.7:
|
| 226 |
-
output.
|
| 227 |
-
output.append(_why_line(resolved_topic))
|
| 228 |
return "\n".join(output).strip()
|
| 229 |
|
| 230 |
if help_mode in {"method", "explain", "concept", "definition"}:
|
| 231 |
shown = _limit_steps(normalized_lines, verbosity, minimum=1)
|
| 232 |
shown = _styled_lines(shown, tone, transparency, resolved_topic)
|
| 233 |
output.append("Explanation:")
|
| 234 |
-
for line in shown
|
| 235 |
-
output.append(f"- {line}")
|
| 236 |
-
|
| 237 |
if transparency >= 0.6:
|
| 238 |
-
output.
|
| 239 |
-
output.append(_why_line(resolved_topic))
|
| 240 |
return "\n".join(output).strip()
|
| 241 |
|
| 242 |
if help_mode == "answer":
|
| 243 |
shown = _limit_steps(normalized_lines, verbosity, minimum=2)
|
| 244 |
-
|
|
|
|
| 245 |
output.append("Answer path:")
|
| 246 |
-
for line in shown
|
| 247 |
-
output.append(f"- {line}")
|
| 248 |
-
|
| 249 |
if transparency >= 0.75:
|
| 250 |
-
output.
|
| 251 |
-
output.append(_why_line(resolved_topic))
|
| 252 |
return "\n".join(output).strip()
|
| 253 |
|
| 254 |
shown = _limit_steps(normalized_lines, verbosity, minimum=1)
|
| 255 |
shown = _styled_lines(shown, tone, transparency, resolved_topic)
|
| 256 |
-
for line in shown
|
| 257 |
-
output.append(f"- {line}")
|
| 258 |
-
|
| 259 |
if transparency >= 0.8:
|
| 260 |
-
output.
|
| 261 |
-
output.append(_why_line(resolved_topic))
|
| 262 |
-
|
| 263 |
return "\n".join(output).strip()
|
| 264 |
|
| 265 |
|
|
@@ -273,23 +230,18 @@ def format_explainer_response(
|
|
| 273 |
) -> str:
|
| 274 |
if not result:
|
| 275 |
return "I can help explain what the question is asking, but I need the full wording of the question."
|
| 276 |
-
|
| 277 |
summary = getattr(result, "summary", "") or ""
|
| 278 |
teaching_points = getattr(result, "teaching_points", []) or []
|
| 279 |
-
|
| 280 |
core_lines: List[str] = []
|
| 281 |
if isinstance(summary, str) and summary.strip():
|
| 282 |
core_lines.append(summary.strip())
|
| 283 |
-
|
| 284 |
if isinstance(teaching_points, list):
|
| 285 |
for item in teaching_points:
|
| 286 |
text = str(item).strip()
|
| 287 |
if text:
|
| 288 |
core_lines.append(text)
|
| 289 |
-
|
| 290 |
if not core_lines:
|
| 291 |
core_lines = ["Start by identifying what the question is asking."]
|
| 292 |
-
|
| 293 |
return format_reply(
|
| 294 |
core="\n".join(core_lines),
|
| 295 |
tone=tone,
|
|
@@ -298,4 +250,4 @@ def format_explainer_response(
|
|
| 298 |
help_mode=help_mode,
|
| 299 |
hint_stage=hint_stage,
|
| 300 |
topic=getattr(result, "topic", None),
|
| 301 |
-
)
|
|
|
|
| 19 |
for line in (core or "").splitlines():
|
| 20 |
cleaned = line.strip()
|
| 21 |
if cleaned:
|
| 22 |
+
lines.append(cleaned.lstrip("- ").strip())
|
| 23 |
return lines
|
| 24 |
|
| 25 |
|
| 26 |
def _normalize_key(text: str) -> str:
|
| 27 |
+
return re.sub(r"\s+", " ", (text or "").strip().lower())
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
def _dedupe_lines(lines: List[str]) -> List[str]:
|
| 31 |
seen = set()
|
| 32 |
+
out: List[str] = []
|
| 33 |
for line in lines:
|
| 34 |
key = _normalize_key(line)
|
| 35 |
if key and key not in seen:
|
| 36 |
seen.add(key)
|
| 37 |
+
out.append(line.strip())
|
| 38 |
+
return out
|
| 39 |
|
| 40 |
|
| 41 |
+
def _extract_topic_from_text(core: str, topic: Optional[str]) -> str:
|
| 42 |
+
if topic:
|
| 43 |
+
return str(topic).strip().lower()
|
| 44 |
+
text = (core or "").lower()
|
| 45 |
+
if "probability" in text or "favorable" in text or "sample space" in text:
|
| 46 |
+
return "probability"
|
| 47 |
+
if "percent" in text or "%" in text:
|
| 48 |
+
return "percent"
|
| 49 |
+
if "ratio" in text or "multiplier" in text:
|
| 50 |
+
return "ratio"
|
| 51 |
+
if "variable" in text or "equation" in text:
|
| 52 |
+
return "algebra"
|
| 53 |
+
if "variability" in text or "standard deviation" in text or "spread" in text:
|
| 54 |
+
return "statistics"
|
| 55 |
+
if "rectangle" in text or "perimeter" in text or "area" in text:
|
| 56 |
+
return "geometry"
|
| 57 |
+
return "general"
|
| 58 |
|
| 59 |
|
| 60 |
def _normalize_display_lines(lines: List[str]) -> List[str]:
|
| 61 |
+
return [re.sub(r"\s+", " ", (line or "").strip()) for line in lines if str(line).strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
+
def _limit_steps(lines: List[str], verbosity: float, minimum: int = 1) -> List[str]:
|
| 65 |
+
if not lines:
|
| 66 |
return []
|
| 67 |
+
if verbosity < 0.22:
|
| 68 |
limit = minimum
|
| 69 |
+
elif verbosity < 0.55:
|
| 70 |
+
limit = max(minimum, min(2, len(lines)))
|
| 71 |
+
elif verbosity < 0.82:
|
| 72 |
+
limit = max(minimum, min(4, len(lines)))
|
| 73 |
else:
|
| 74 |
+
limit = len(lines)
|
| 75 |
+
return lines[:limit]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
def _why_line(topic: str) -> str:
|
| 79 |
if topic == "algebra":
|
| 80 |
+
return "Why this helps: reversing operations in the right order keeps the equation equivalent while you isolate the variable."
|
| 81 |
if topic == "percent":
|
| 82 |
+
return "Why this helps: percent questions usually break when the base quantity is chosen incorrectly."
|
| 83 |
if topic == "ratio":
|
| 84 |
+
return "Why this helps: ratio numbers are usually parts, not the final quantities themselves."
|
| 85 |
if topic == "probability":
|
| 86 |
+
return "Why this helps: the numerator and denominator must be counted under the same rules."
|
| 87 |
if topic == "statistics":
|
| 88 |
+
return "Why this helps: statistics questions depend on choosing the right measure before calculating."
|
| 89 |
if topic == "geometry":
|
| 90 |
+
return "Why this helps: matching the right formula to the shape simplifies the rest of the work."
|
| 91 |
+
return "Why this helps: getting the structure right first makes the next step clearer."
|
|
|
|
|
|
|
| 92 |
|
| 93 |
|
| 94 |
def _tone_rewrite(line: str, tone: float, position: int = 0) -> str:
|
| 95 |
text = (line or "").strip()
|
| 96 |
if not text:
|
| 97 |
return text
|
| 98 |
+
if tone < 0.25:
|
|
|
|
| 99 |
return text
|
| 100 |
+
if tone < 0.55:
|
|
|
|
|
|
|
| 101 |
return f"Start here: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
|
| 102 |
+
if tone < 0.8:
|
| 103 |
+
return f"A good place to start is this: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
|
| 104 |
+
return f"You can start with this: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
|
| 105 |
|
| 106 |
|
| 107 |
def _transparency_expansion(line: str, topic: str, transparency: float, position: int = 0) -> str:
|
| 108 |
text = (line or "").strip()
|
| 109 |
+
if not text or transparency < 0.35:
|
|
|
|
|
|
|
|
|
|
| 110 |
return text
|
| 111 |
if transparency < 0.7:
|
| 112 |
if position == 0:
|
| 113 |
if topic == "algebra":
|
| 114 |
+
return f"{text} This keeps the equation balanced while you isolate the variable."
|
| 115 |
if topic == "percent":
|
| 116 |
+
return f"{text} This keeps the percent relationship tied to the correct base quantity."
|
| 117 |
if topic == "ratio":
|
| 118 |
+
return f"{text} This turns the ratio into usable quantities instead of labels."
|
| 119 |
if topic == "probability":
|
| 120 |
+
return f"{text} This separates successful outcomes from total outcomes."
|
| 121 |
return text
|
|
|
|
| 122 |
if position == 0:
|
| 123 |
if topic == "algebra":
|
| 124 |
+
return f"{text} In algebra, each step should preserve an equivalent equation so the solution does not change while the variable is isolated."
|
| 125 |
if topic == "percent":
|
| 126 |
+
return f"{text} Percent problems become clearer once the base quantity is fixed, because every percentage must refer back to some amount."
|
| 127 |
if topic == "ratio":
|
| 128 |
+
return f"{text} Ratio numbers usually describe relative parts, so turning them into multiples of one common quantity is what makes the setup usable."
|
| 129 |
if topic == "probability":
|
| 130 |
+
return f"{text} Probability depends on a consistent sample space, so the numerator and denominator must be counted under the same rules."
|
| 131 |
if topic == "statistics":
|
| 132 |
+
return f"{text} Statistics questions often hinge on choosing the right measure first, because different measures capture different features of the data."
|
| 133 |
if topic == "geometry":
|
| 134 |
+
return f"{text} Geometry problems often become routine once the correct formula is chosen, because the rest is usually substitution and algebra."
|
| 135 |
+
return f"{text} This makes the underlying structure explicit before you calculate."
|
| 136 |
return text
|
| 137 |
|
| 138 |
|
|
|
|
| 156 |
) -> str:
|
| 157 |
prefix = style_prefix(tone)
|
| 158 |
core = (core or "").strip()
|
|
|
|
| 159 |
if not core:
|
| 160 |
return prefix or "Start with the structure of the problem."
|
| 161 |
|
|
|
|
| 165 |
|
| 166 |
resolved_topic = _extract_topic_from_text(core, topic)
|
| 167 |
normalized_lines = _normalize_display_lines(lines)
|
|
|
|
| 168 |
output: List[str] = []
|
| 169 |
if prefix:
|
| 170 |
+
output.extend([prefix, ""])
|
|
|
|
| 171 |
|
| 172 |
if help_mode == "hint":
|
| 173 |
+
idx = max(0, min(int(hint_stage or 1) - 1, len(normalized_lines) - 1))
|
| 174 |
if verbosity < 0.25:
|
|
|
|
| 175 |
shown = [normalized_lines[idx]]
|
| 176 |
+
elif verbosity < 0.62:
|
| 177 |
+
shown = normalized_lines[idx: idx + 2] or [normalized_lines[idx]]
|
|
|
|
| 178 |
else:
|
| 179 |
+
shown = normalized_lines[: min(4, len(normalized_lines))]
|
|
|
|
| 180 |
shown = _styled_lines(shown, tone, transparency, resolved_topic)
|
| 181 |
output.append("Hint:")
|
| 182 |
+
output.extend(f"- {line}" for line in shown)
|
|
|
|
|
|
|
| 183 |
if transparency >= 0.75:
|
| 184 |
+
output.extend(["", _why_line(resolved_topic)])
|
|
|
|
| 185 |
return "\n".join(output).strip()
|
| 186 |
|
| 187 |
if help_mode in {"walkthrough", "instruction", "step_by_step"}:
|
| 188 |
shown = _limit_steps(normalized_lines, verbosity, minimum=2 if help_mode == "walkthrough" else 1)
|
| 189 |
shown = _styled_lines(shown, tone, transparency, resolved_topic)
|
| 190 |
output.append("Walkthrough:" if help_mode == "walkthrough" else "Step-by-step path:")
|
| 191 |
+
output.extend(f"- {line}" for line in shown)
|
|
|
|
|
|
|
| 192 |
if transparency >= 0.7:
|
| 193 |
+
output.extend(["", _why_line(resolved_topic)])
|
|
|
|
| 194 |
return "\n".join(output).strip()
|
| 195 |
|
| 196 |
if help_mode in {"method", "explain", "concept", "definition"}:
|
| 197 |
shown = _limit_steps(normalized_lines, verbosity, minimum=1)
|
| 198 |
shown = _styled_lines(shown, tone, transparency, resolved_topic)
|
| 199 |
output.append("Explanation:")
|
| 200 |
+
output.extend(f"- {line}" for line in shown)
|
|
|
|
|
|
|
| 201 |
if transparency >= 0.6:
|
| 202 |
+
output.extend(["", _why_line(resolved_topic)])
|
|
|
|
| 203 |
return "\n".join(output).strip()
|
| 204 |
|
| 205 |
if help_mode == "answer":
|
| 206 |
shown = _limit_steps(normalized_lines, verbosity, minimum=2)
|
| 207 |
+
answer_transparency = transparency if verbosity >= 0.45 else min(transparency, 0.45)
|
| 208 |
+
shown = _styled_lines(shown, tone, answer_transparency, resolved_topic)
|
| 209 |
output.append("Answer path:")
|
| 210 |
+
output.extend(f"- {line}" for line in shown)
|
|
|
|
|
|
|
| 211 |
if transparency >= 0.75:
|
| 212 |
+
output.extend(["", _why_line(resolved_topic)])
|
|
|
|
| 213 |
return "\n".join(output).strip()
|
| 214 |
|
| 215 |
shown = _limit_steps(normalized_lines, verbosity, minimum=1)
|
| 216 |
shown = _styled_lines(shown, tone, transparency, resolved_topic)
|
| 217 |
+
output.extend(f"- {line}" for line in shown)
|
|
|
|
|
|
|
| 218 |
if transparency >= 0.8:
|
| 219 |
+
output.extend(["", _why_line(resolved_topic)])
|
|
|
|
|
|
|
| 220 |
return "\n".join(output).strip()
|
| 221 |
|
| 222 |
|
|
|
|
| 230 |
) -> str:
|
| 231 |
if not result:
|
| 232 |
return "I can help explain what the question is asking, but I need the full wording of the question."
|
|
|
|
| 233 |
summary = getattr(result, "summary", "") or ""
|
| 234 |
teaching_points = getattr(result, "teaching_points", []) or []
|
|
|
|
| 235 |
core_lines: List[str] = []
|
| 236 |
if isinstance(summary, str) and summary.strip():
|
| 237 |
core_lines.append(summary.strip())
|
|
|
|
| 238 |
if isinstance(teaching_points, list):
|
| 239 |
for item in teaching_points:
|
| 240 |
text = str(item).strip()
|
| 241 |
if text:
|
| 242 |
core_lines.append(text)
|
|
|
|
| 243 |
if not core_lines:
|
| 244 |
core_lines = ["Start by identifying what the question is asking."]
|
|
|
|
| 245 |
return format_reply(
|
| 246 |
core="\n".join(core_lines),
|
| 247 |
tone=tone,
|
|
|
|
| 250 |
help_mode=help_mode,
|
| 251 |
hint_stage=hint_stage,
|
| 252 |
topic=getattr(result, "topic", None),
|
| 253 |
+
)
|
question_fallback_router.py
CHANGED
|
@@ -1,9 +1,25 @@
|
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
from typing import Any, Dict, List, Optional
|
| 3 |
|
| 4 |
from question_support_loader import question_support_bank
|
| 5 |
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
class QuestionFallbackRouter:
|
| 8 |
def _clean(self, text: Optional[str]) -> str:
|
| 9 |
return (text or "").strip()
|
|
@@ -16,8 +32,7 @@ class QuestionFallbackRouter:
|
|
| 16 |
if isinstance(value, tuple):
|
| 17 |
return [str(v).strip() for v in value if str(v).strip()]
|
| 18 |
if isinstance(value, str):
|
| 19 |
-
|
| 20 |
-
return [text] if text else []
|
| 21 |
return []
|
| 22 |
|
| 23 |
def _dedupe(self, items: List[str]) -> List[str]:
|
|
@@ -34,338 +49,381 @@ class QuestionFallbackRouter:
|
|
| 34 |
def _normalize_topic(self, topic: Optional[str], question_text: str) -> str:
|
| 35 |
q = (question_text or "").lower()
|
| 36 |
t = (topic or "").strip().lower()
|
| 37 |
-
|
| 38 |
if t and t not in {"general", "unknown", "general_quant", "quant"}:
|
| 39 |
return t
|
| 40 |
-
if t == "quant":
|
| 41 |
-
t = ""
|
| 42 |
if "%" in q or "percent" in q:
|
| 43 |
return "percent"
|
| 44 |
-
if "
|
| 45 |
-
return "ratio"
|
| 46 |
-
if any(k in q for k in ["probability", "odds", "chance", "random"]):
|
| 47 |
return "probability"
|
| 48 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
return "number_theory"
|
| 50 |
-
if any(k in q for k in ["
|
| 51 |
return "geometry"
|
| 52 |
-
if
|
| 53 |
-
return "statistics"
|
| 54 |
-
if "=" in q or re.search(r"\b[xyzabn]\b", q):
|
| 55 |
return "algebra"
|
| 56 |
return "general"
|
| 57 |
|
| 58 |
-
def _preview_question(self, question_text: str) -> str:
|
| 59 |
-
cleaned = " ".join((question_text or "").split())
|
| 60 |
-
if len(cleaned) <= 120:
|
| 61 |
-
return cleaned
|
| 62 |
-
return cleaned[:117].rstrip() + "..."
|
| 63 |
-
|
| 64 |
def _extract_equation(self, question_text: str) -> Optional[str]:
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
if m:
|
| 68 |
-
eq = self._clean(m.group(1))
|
| 69 |
-
return eq or None
|
| 70 |
-
return None
|
| 71 |
|
| 72 |
def _extract_ratio(self, question_text: str) -> Optional[str]:
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
def _extract_percent_values(self, question_text: str) -> List[str]:
|
| 80 |
-
return re.findall(r"\d+\.?\d*\s*%", question_text or "")
|
| 81 |
-
|
| 82 |
-
def _looks_like_linear_equation(self, question_text: str) -> bool:
|
| 83 |
-
q = self._clean(question_text)
|
| 84 |
-
low = q.lower()
|
| 85 |
-
return bool(
|
| 86 |
-
"=" in q
|
| 87 |
-
and re.search(r"\bwhat is\s+[a-z]\b", low)
|
| 88 |
-
and re.search(r"\d+[a-z]\b|\b[a-z]\b", q)
|
| 89 |
-
)
|
| 90 |
|
| 91 |
def _pack_looks_generic(self, pack: Dict[str, Any], topic: str) -> bool:
|
| 92 |
if not pack:
|
| 93 |
return True
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
]
|
| 103 |
-
).lower()
|
| 104 |
-
generic_signals = [
|
| 105 |
-
"write the equation clearly and identify the variable",
|
| 106 |
-
"undo operations in reverse order",
|
| 107 |
-
"keep both sides balanced",
|
| 108 |
-
"break the question into known and unknown parts",
|
| 109 |
-
"what is being asked?",
|
| 110 |
-
"what information is given?",
|
| 111 |
-
"translate words into math",
|
| 112 |
-
]
|
| 113 |
-
if any(signal in joined for signal in generic_signals):
|
| 114 |
-
return True
|
| 115 |
-
if topic == "algebra" and "look at the structure" in joined:
|
| 116 |
return True
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
-
def
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
ratio_text = self._extract_ratio(question_text)
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
"
|
| 128 |
-
"
|
| 129 |
-
"hint_2": "Translate the key relationship in the question into a usable setup.",
|
| 130 |
-
"hint_3": "Check each step against the wording before choosing an option.",
|
| 131 |
"hint_ladder": [
|
| 132 |
-
|
| 133 |
-
"
|
| 134 |
-
"
|
|
|
|
| 135 |
],
|
| 136 |
"walkthrough_steps": [
|
| 137 |
-
|
| 138 |
-
"
|
| 139 |
-
"
|
|
|
|
| 140 |
],
|
| 141 |
"method_steps": [
|
| 142 |
-
"
|
| 143 |
-
"Use the wording to decide which relationship matters most.",
|
| 144 |
],
|
| 145 |
"answer_path": [
|
| 146 |
-
"
|
| 147 |
-
"
|
|
|
|
| 148 |
],
|
| 149 |
-
"common_trap": "
|
| 150 |
}
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
"hint_2": "Reverse the operations in a sensible order instead of trying to simplify everything at once.",
|
| 189 |
-
"hint_3": "Only evaluate the target expression after the variables are in a usable form.",
|
| 190 |
-
}
|
| 191 |
-
)
|
| 192 |
-
|
| 193 |
-
elif topic == "percent":
|
| 194 |
-
first_step = "Identify the base quantity before doing any percent calculation."
|
| 195 |
-
if percent_values:
|
| 196 |
-
first_step = f"Track the percentage relationship carefully here: {' then '.join(percent_values[:2]) if len(percent_values) > 1 else percent_values[0]}"
|
| 197 |
-
if "increased by" in question_text.lower() and "decreased by" in question_text.lower():
|
| 198 |
-
generic.update(
|
| 199 |
-
{
|
| 200 |
-
"first_step": "Turn each percentage change into its own multiplier before combining anything.",
|
| 201 |
-
"hint_1": "An increase and a decrease of the same percent do not cancel because they apply to different bases.",
|
| 202 |
-
"hint_2": "Apply the first multiplier, then apply the second multiplier to the updated amount.",
|
| 203 |
-
"hint_3": "Compare the final amount with the original amount only at the end.",
|
| 204 |
-
"hint_ladder": [
|
| 205 |
-
"Turn each percentage change into its own multiplier before combining anything.",
|
| 206 |
-
"Apply the first multiplier, then apply the second multiplier to the updated amount.",
|
| 207 |
-
"Compare the final amount with the original amount only at the end.",
|
| 208 |
-
],
|
| 209 |
-
}
|
| 210 |
-
)
|
| 211 |
-
else:
|
| 212 |
-
generic.update(
|
| 213 |
-
{
|
| 214 |
-
"first_step": first_step,
|
| 215 |
-
"hint_1": "Ask 'percent of what?' so you choose the correct base quantity.",
|
| 216 |
-
"hint_2": "Rewrite the percent as a decimal or fraction if that makes the relationship clearer.",
|
| 217 |
-
"hint_3": "Set up part = percent × base, or reverse that relationship if the base is unknown.",
|
| 218 |
-
"hint_ladder": [
|
| 219 |
-
"Ask 'percent of what?' so you choose the correct base quantity.",
|
| 220 |
-
"Rewrite the percent as a decimal or fraction if that makes the relationship clearer.",
|
| 221 |
-
"Set up part = percent × base, or reverse that relationship if the base is unknown.",
|
| 222 |
-
],
|
| 223 |
-
}
|
| 224 |
-
)
|
| 225 |
-
|
| 226 |
-
elif topic == "ratio":
|
| 227 |
-
first_step = "Keep the ratio order consistent and assign one shared multiplier."
|
| 228 |
-
if ratio_text:
|
| 229 |
-
first_step = f"Use the ratio {ratio_text} as parts of one whole."
|
| 230 |
-
generic.update(
|
| 231 |
-
{
|
| 232 |
-
"first_step": first_step,
|
| 233 |
-
"hint_1": "Write each part of the ratio using the same multiplier.",
|
| 234 |
-
"hint_2": "Use the total or known part to solve for that shared multiplier.",
|
| 235 |
-
"hint_3": "Substitute back into the exact quantity the question asks for.",
|
| 236 |
-
"hint_ladder": [
|
| 237 |
-
"Write each part of the ratio using the same multiplier.",
|
| 238 |
-
"Use the total or known part to solve for that shared multiplier.",
|
| 239 |
-
"Substitute back into the exact quantity the question asks for.",
|
| 240 |
-
],
|
| 241 |
-
"walkthrough_steps": [
|
| 242 |
-
first_step,
|
| 243 |
-
"Represent each ratio part in terms of one common variable such as k.",
|
| 244 |
-
"Use the given total or condition to find k.",
|
| 245 |
-
"Build the requested expression from those ratio parts.",
|
| 246 |
-
],
|
| 247 |
-
"method_steps": [
|
| 248 |
-
"Ratio problems usually become easier once you turn the ratio into matching parts.",
|
| 249 |
-
"Avoid treating ratio numbers as the actual values unless the problem tells you they are.",
|
| 250 |
-
],
|
| 251 |
-
"common_trap": "Using the raw ratio numbers as real values before solving for the common multiplier.",
|
| 252 |
-
}
|
| 253 |
-
)
|
| 254 |
-
|
| 255 |
-
elif topic == "probability":
|
| 256 |
-
generic.update(
|
| 257 |
-
{
|
| 258 |
-
"first_step": "Decide what counts as a successful outcome before you count anything.",
|
| 259 |
-
"hint_1": "Count the favorable outcomes that satisfy the condition.",
|
| 260 |
-
"hint_2": "Count the total possible outcomes in the sample space.",
|
| 261 |
-
"hint_3": "Build the probability as favorable over total, then simplify if needed.",
|
| 262 |
-
"hint_ladder": [
|
| 263 |
-
"Decide what counts as a successful outcome before you count anything.",
|
| 264 |
-
"Count the favorable outcomes that satisfy the condition.",
|
| 265 |
-
"Count the total possible outcomes in the sample space.",
|
| 266 |
-
],
|
| 267 |
-
"walkthrough_steps": [
|
| 268 |
-
"Define the event the question cares about.",
|
| 269 |
-
"Count or construct the favorable cases.",
|
| 270 |
-
"Count the total cases in the sample space.",
|
| 271 |
-
"Write the probability as favorable over total.",
|
| 272 |
-
],
|
| 273 |
-
"method_steps": [
|
| 274 |
-
"Probability questions become clearer once the event and the sample space are both explicit.",
|
| 275 |
-
"Many errors come from counting the wrong denominator, not the numerator.",
|
| 276 |
-
],
|
| 277 |
-
"common_trap": "Changing the denominator incorrectly or forgetting which cases are actually favorable.",
|
| 278 |
-
}
|
| 279 |
-
)
|
| 280 |
-
if "at least" in question_text.lower():
|
| 281 |
-
generic["hint_2"] = "Check whether the complement is easier to count than the requested event."
|
| 282 |
-
generic["hint_ladder"] = [
|
| 283 |
-
generic["hint_1"],
|
| 284 |
-
"Check whether the complement is easier to count than the requested event.",
|
| 285 |
-
generic["hint_3"],
|
| 286 |
-
]
|
| 287 |
-
|
| 288 |
-
elif topic == "statistics":
|
| 289 |
-
qlow = question_text.lower()
|
| 290 |
-
if any(k in qlow for k in ["variability", "spread", "standard deviation"]):
|
| 291 |
-
generic.update(
|
| 292 |
-
{
|
| 293 |
-
"first_step": "Notice that this is about spread, not average.",
|
| 294 |
-
"hint_1": "Use the middle value as a centre and compare how far the outer values sit from it.",
|
| 295 |
-
"hint_2": "A set with values clustered tightly has lower variability than a set spread farther apart.",
|
| 296 |
-
"hint_3": "Choose the set with the widest spread, not the largest mean.",
|
| 297 |
-
"hint_ladder": [
|
| 298 |
-
"Notice that this is about spread, not average.",
|
| 299 |
-
"Use the middle value as a centre and compare how far the outer values sit from it.",
|
| 300 |
-
"Choose the set with the widest spread, not the largest mean.",
|
| 301 |
-
],
|
| 302 |
-
}
|
| 303 |
-
)
|
| 304 |
-
else:
|
| 305 |
-
generic.update(
|
| 306 |
-
{
|
| 307 |
-
"first_step": "Identify which statistical measure the question wants before calculating anything.",
|
| 308 |
-
"hint_1": "Check whether the task is asking for mean, median, range, or another measure.",
|
| 309 |
-
"hint_2": "Organise the data in a clean order if that helps reveal the measure.",
|
| 310 |
-
"hint_3": "Use the exact definition of the requested measure rather than a nearby one.",
|
| 311 |
-
}
|
| 312 |
-
)
|
| 313 |
-
|
| 314 |
-
if has_options:
|
| 315 |
-
generic["answer_path"] = list(generic.get("answer_path", [])) + [
|
| 316 |
-
"Use the answer choices to check which setup fits the question instead of guessing."
|
| 317 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
|
| 321 |
def _merge_support_pack(self, generated: Dict[str, Any], stored: Optional[Dict[str, Any]], topic: str) -> Dict[str, Any]:
|
| 322 |
if not stored:
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
return
|
| 326 |
-
|
| 327 |
-
merged = dict(
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
"hint_1",
|
| 334 |
-
"hint_2",
|
| 335 |
-
"hint_3",
|
| 336 |
-
"hint_ladder",
|
| 337 |
-
"walkthrough_steps",
|
| 338 |
-
"method_steps",
|
| 339 |
-
"answer_path",
|
| 340 |
-
"common_trap",
|
| 341 |
-
]:
|
| 342 |
-
if key in generated:
|
| 343 |
-
merged[key] = generated[key]
|
| 344 |
merged["support_source"] = "question_bank_refined"
|
| 345 |
else:
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
return merged
|
| 351 |
|
| 352 |
-
def get_support_pack(
|
| 353 |
-
self,
|
| 354 |
-
*,
|
| 355 |
-
question_id: Optional[str],
|
| 356 |
-
question_text: str,
|
| 357 |
-
options_text: Optional[List[str]],
|
| 358 |
-
topic: Optional[str],
|
| 359 |
-
category: Optional[str],
|
| 360 |
-
) -> Dict[str, Any]:
|
| 361 |
resolved_topic = self._normalize_topic(topic, question_text)
|
| 362 |
generated = self._topic_defaults(resolved_topic, question_text, options_text)
|
| 363 |
-
stored = question_support_bank.get(
|
| 364 |
-
question_id=question_id,
|
| 365 |
-
question_text=question_text,
|
| 366 |
-
options_text=options_text,
|
| 367 |
-
)
|
| 368 |
-
|
| 369 |
pack = self._merge_support_pack(generated, stored, resolved_topic)
|
| 370 |
pack.setdefault("question_id", question_id)
|
| 371 |
pack.setdefault("question_text", question_text)
|
|
@@ -377,24 +435,21 @@ class QuestionFallbackRouter:
|
|
| 377 |
return pack
|
| 378 |
|
| 379 |
def _hint_ladder_from_pack(self, pack: Dict[str, Any]) -> List[str]:
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
hints.append(first_step)
|
| 384 |
for key in ("hint_1", "hint_2", "hint_3"):
|
| 385 |
value = self._clean(pack.get(key))
|
| 386 |
if value:
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
return self._dedupe(
|
| 391 |
|
| 392 |
def _walkthrough_from_pack(self, pack: Dict[str, Any]) -> List[str]:
|
| 393 |
-
lines
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
lines.append(first_step)
|
| 397 |
-
lines.extend(self._listify(pack.get("walkthrough_steps")))
|
| 398 |
return self._dedupe(lines)
|
| 399 |
|
| 400 |
def _method_from_pack(self, pack: Dict[str, Any]) -> List[str]:
|
|
@@ -405,89 +460,57 @@ class QuestionFallbackRouter:
|
|
| 405 |
lines.extend(self._listify(pack.get("method_steps")))
|
| 406 |
lines.extend(self._listify(pack.get("method_explanation")))
|
| 407 |
if not lines:
|
| 408 |
-
lines.extend(self._walkthrough_from_pack(pack)[:
|
| 409 |
return self._dedupe(lines)
|
| 410 |
|
| 411 |
def _answer_path_from_pack(self, pack: Dict[str, Any]) -> List[str]:
|
| 412 |
-
lines
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
lines.append(first_step)
|
| 416 |
-
lines.extend(self._listify(pack.get("answer_path")))
|
| 417 |
return self._dedupe(lines)
|
| 418 |
|
| 419 |
-
def _verbosity_limit(self, verbosity: float, low: int, mid: int, high: int) -> int:
|
| 420 |
-
if verbosity < 0.
|
| 421 |
return low
|
| 422 |
-
if verbosity < 0.
|
| 423 |
return mid
|
| 424 |
return high
|
| 425 |
|
| 426 |
-
def build_response(
|
| 427 |
-
self,
|
| 428 |
-
*,
|
| 429 |
-
question_id: Optional[str],
|
| 430 |
-
question_text: str,
|
| 431 |
-
options_text: Optional[List[str]],
|
| 432 |
-
topic: Optional[str],
|
| 433 |
-
category: Optional[str],
|
| 434 |
-
help_mode: str,
|
| 435 |
-
hint_stage: int,
|
| 436 |
-
verbosity: float,
|
| 437 |
-
) -> Dict[str, Any]:
|
| 438 |
-
pack = self.get_support_pack(
|
| 439 |
-
question_id=question_id,
|
| 440 |
-
question_text=question_text,
|
| 441 |
-
options_text=options_text,
|
| 442 |
-
topic=topic,
|
| 443 |
-
category=category,
|
| 444 |
-
)
|
| 445 |
-
|
| 446 |
mode = (help_mode or "answer").lower()
|
| 447 |
-
stage = max(1,
|
| 448 |
-
|
| 449 |
-
first_step = self._clean(pack.get("first_step"))
|
| 450 |
hint_ladder = self._hint_ladder_from_pack(pack)
|
| 451 |
walkthrough_steps = self._walkthrough_from_pack(pack)
|
| 452 |
method_steps = self._method_from_pack(pack)
|
| 453 |
answer_path = self._answer_path_from_pack(pack)
|
| 454 |
common_trap = self._clean(pack.get("common_trap"))
|
| 455 |
|
| 456 |
-
lines: List[str] = []
|
| 457 |
-
|
| 458 |
if mode == "hint":
|
|
|
|
| 459 |
if hint_ladder:
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
|
|
|
| 464 |
else:
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
selected.append(f"Watch out for this trap: {common_trap}")
|
| 468 |
-
lines = self._dedupe(selected)
|
| 469 |
-
|
| 470 |
-
elif mode in {"walkthrough", "step_by_step", "instruction"}:
|
| 471 |
source = walkthrough_steps or answer_path or hint_ladder
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
if verbosity >= 0.7 and common_trap:
|
| 475 |
lines = list(lines) + [f"Watch out for this trap: {common_trap}"]
|
| 476 |
-
|
| 477 |
elif mode in {"method", "explain", "concept", "definition"}:
|
| 478 |
-
source = method_steps or walkthrough_steps or answer_path
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
if verbosity >= 0.65 and common_trap:
|
| 482 |
lines = list(lines) + [f"Common trap: {common_trap}"]
|
| 483 |
-
|
| 484 |
else:
|
| 485 |
source = answer_path or walkthrough_steps or hint_ladder
|
| 486 |
-
|
| 487 |
-
lines = source[:limit] if source else [first_step or "Start by identifying the relationship in the question."]
|
| 488 |
|
| 489 |
-
lines
|
| 490 |
-
return {"lines": lines, "pack": pack}
|
| 491 |
|
| 492 |
|
| 493 |
-
question_fallback_router = QuestionFallbackRouter()
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
import re
|
| 4 |
from typing import Any, Dict, List, Optional
|
| 5 |
|
| 6 |
from question_support_loader import question_support_bank
|
| 7 |
|
| 8 |
|
| 9 |
+
GENERIC_MARKERS = {
|
| 10 |
+
"write the equation clearly and identify the variable.",
|
| 11 |
+
"undo operations in reverse order.",
|
| 12 |
+
"keep both sides balanced while isolating the variable.",
|
| 13 |
+
"understand the problem.",
|
| 14 |
+
"identify variables.",
|
| 15 |
+
"set up relationships.",
|
| 16 |
+
"solve step by step.",
|
| 17 |
+
"what is being asked?",
|
| 18 |
+
"what information is given?",
|
| 19 |
+
"how can you link them mathematically?",
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
class QuestionFallbackRouter:
|
| 24 |
def _clean(self, text: Optional[str]) -> str:
|
| 25 |
return (text or "").strip()
|
|
|
|
| 32 |
if isinstance(value, tuple):
|
| 33 |
return [str(v).strip() for v in value if str(v).strip()]
|
| 34 |
if isinstance(value, str):
|
| 35 |
+
return [value.strip()] if value.strip() else []
|
|
|
|
| 36 |
return []
|
| 37 |
|
| 38 |
def _dedupe(self, items: List[str]) -> List[str]:
|
|
|
|
| 49 |
def _normalize_topic(self, topic: Optional[str], question_text: str) -> str:
|
| 50 |
q = (question_text or "").lower()
|
| 51 |
t = (topic or "").strip().lower()
|
|
|
|
| 52 |
if t and t not in {"general", "unknown", "general_quant", "quant"}:
|
| 53 |
return t
|
|
|
|
|
|
|
| 54 |
if "%" in q or "percent" in q:
|
| 55 |
return "percent"
|
| 56 |
+
if "probability" in q or "chance" in q or "at random" in q or "odds" in q:
|
|
|
|
|
|
|
| 57 |
return "probability"
|
| 58 |
+
if "ratio" in q or re.search(r"\d+\s*:\s*\d+", q):
|
| 59 |
+
return "ratio"
|
| 60 |
+
if any(k in q for k in ["mean", "median", "standard deviation", "variability", "spread"]):
|
| 61 |
+
return "statistics"
|
| 62 |
+
if any(k in q for k in ["remainder", "prime", "divisible", "factor", "multiple"]):
|
| 63 |
return "number_theory"
|
| 64 |
+
if any(k in q for k in ["rectangle", "triangle", "circle", "perimeter", "area"]):
|
| 65 |
return "geometry"
|
| 66 |
+
if "=" in q or re.search(r"[xyzabn]", q):
|
|
|
|
|
|
|
| 67 |
return "algebra"
|
| 68 |
return "general"
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
def _extract_equation(self, question_text: str) -> Optional[str]:
|
| 71 |
+
m = re.search(r"([^?]*=[^?]*)", self._clean(question_text))
|
| 72 |
+
return self._clean(m.group(1)) if m else None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
def _extract_ratio(self, question_text: str) -> Optional[str]:
|
| 75 |
+
m = re.search(r"(\d+\s*:\s*\d+)", question_text or "")
|
| 76 |
+
return self._clean(m.group(1)) if m else None
|
| 77 |
+
|
| 78 |
+
def _looks_like_simple_linear(self, question_text: str) -> bool:
|
| 79 |
+
q = (question_text or "").lower()
|
| 80 |
+
return bool("=" in q and re.search(r"what is\s+[a-z]", q))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
def _pack_looks_generic(self, pack: Dict[str, Any], topic: str) -> bool:
|
| 83 |
if not pack:
|
| 84 |
return True
|
| 85 |
+
lines: List[str] = []
|
| 86 |
+
for key in ["first_step", "hint_1", "hint_2", "hint_3", "common_trap", "concept"]:
|
| 87 |
+
value = self._clean(pack.get(key))
|
| 88 |
+
if value:
|
| 89 |
+
lines.append(value.lower())
|
| 90 |
+
for key in ["hint_ladder", "walkthrough_steps", "method_steps", "method_explanation", "answer_path"]:
|
| 91 |
+
lines.extend(x.lower() for x in self._listify(pack.get(key)))
|
| 92 |
+
if not lines:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
return True
|
| 94 |
+
meaningful = 0
|
| 95 |
+
for line in lines:
|
| 96 |
+
if line not in GENERIC_MARKERS and len(line.split()) >= 5:
|
| 97 |
+
meaningful += 1
|
| 98 |
+
if pack.get("support_match", {}).get("mode") in {"question_id", "signature_exact", "text_exact"} and meaningful >= 2:
|
| 99 |
+
return False
|
| 100 |
+
return meaningful < 3
|
| 101 |
+
|
| 102 |
+
def _algebra_pack(self, question_text: str) -> Dict[str, Any]:
|
| 103 |
+
eq = self._extract_equation(question_text) or "the equation"
|
| 104 |
+
if self._looks_like_simple_linear(question_text):
|
| 105 |
+
return {
|
| 106 |
+
"first_step": f"Start with {eq} and undo the outer operation around the variable first.",
|
| 107 |
+
"hint_1": "Move the constant term on the variable side by doing the opposite operation to both sides.",
|
| 108 |
+
"hint_2": "Once the variable term is isolated, undo the coefficient on the variable.",
|
| 109 |
+
"hint_3": "Check your value by substituting it back into the original equation.",
|
| 110 |
+
"hint_ladder": [
|
| 111 |
+
f"Look at {eq} and ask which operation is happening to the variable last.",
|
| 112 |
+
"Undo the constant attached to the variable side by using the opposite operation on both sides.",
|
| 113 |
+
"After that, undo the multiplication or division on the variable itself.",
|
| 114 |
+
"Substitute your candidate value back in to verify it reproduces the original right-hand side.",
|
| 115 |
+
],
|
| 116 |
+
"walkthrough_steps": [
|
| 117 |
+
f"Rewrite the equation cleanly: {eq}.",
|
| 118 |
+
"Undo the addition or subtraction around the variable by applying the opposite operation to both sides.",
|
| 119 |
+
"Then undo the multiplication or division on the variable.",
|
| 120 |
+
"Check the result in the original equation, not just the simplified one.",
|
| 121 |
+
],
|
| 122 |
+
"method_steps": [
|
| 123 |
+
"Linear equations are usually solved by reversing operations in the opposite order from how they affect the variable.",
|
| 124 |
+
"Keeping both sides balanced is what lets every step stay equivalent to the original equation.",
|
| 125 |
+
],
|
| 126 |
+
"answer_path": [
|
| 127 |
+
"Reverse the outer operation first.",
|
| 128 |
+
"Then remove the coefficient from the variable.",
|
| 129 |
+
"Verify by substitution.",
|
| 130 |
+
],
|
| 131 |
+
"common_trap": "Undoing the coefficient before removing the constant on the variable side.",
|
| 132 |
+
}
|
| 133 |
+
return {
|
| 134 |
+
"first_step": f"Rewrite {eq} in a clean algebraic form before manipulating it.",
|
| 135 |
+
"hint_1": "Decide which quantity is unknown and which relationships are given.",
|
| 136 |
+
"hint_2": "Set up one equation at a time from those relationships.",
|
| 137 |
+
"hint_3": "Only simplify after the structure is correct.",
|
| 138 |
+
"walkthrough_steps": [
|
| 139 |
+
"Name the unknown quantity clearly.",
|
| 140 |
+
"Translate each condition into an equation or constraint.",
|
| 141 |
+
"Simplify the algebra only after the setup is correct.",
|
| 142 |
+
],
|
| 143 |
+
"method_steps": [
|
| 144 |
+
"Algebra questions are easiest when you translate the wording into relationships before calculating.",
|
| 145 |
+
],
|
| 146 |
+
"answer_path": [
|
| 147 |
+
"Identify the unknown.",
|
| 148 |
+
"Build the equation.",
|
| 149 |
+
"Isolate the target quantity.",
|
| 150 |
+
],
|
| 151 |
+
"common_trap": "Starting calculations before defining the unknown or building the equation correctly.",
|
| 152 |
+
}
|
| 153 |
|
| 154 |
+
def _percent_pack(self, question_text: str) -> Dict[str, Any]:
|
| 155 |
+
q = question_text.lower()
|
| 156 |
+
if "increase" in q or "decrease" in q:
|
| 157 |
+
return {
|
| 158 |
+
"first_step": "Turn each percent change into its own multiplier before combining anything.",
|
| 159 |
+
"hint_1": "Use the original amount as a clean base, often 100, unless the question already gives a convenient number.",
|
| 160 |
+
"hint_2": "Apply the first percentage change to the current amount, not the final amount.",
|
| 161 |
+
"hint_3": "Apply the second change to the updated amount, then compare with the original only at the end.",
|
| 162 |
+
"hint_ladder": [
|
| 163 |
+
"Treat a percent increase or decrease as multiplication, not simple adding or subtracting percentages.",
|
| 164 |
+
"Apply the first multiplier to the starting amount.",
|
| 165 |
+
"Apply the second multiplier to that new amount.",
|
| 166 |
+
"Compare the final result with the original base only after both changes are done.",
|
| 167 |
+
],
|
| 168 |
+
"walkthrough_steps": [
|
| 169 |
+
"Choose an easy original value such as 100 if no starting number is given.",
|
| 170 |
+
"Convert each percentage change into a multiplier.",
|
| 171 |
+
"Apply the multipliers in sequence.",
|
| 172 |
+
"Express the final amount relative to the original amount.",
|
| 173 |
+
],
|
| 174 |
+
"method_steps": [
|
| 175 |
+
"Successive percent changes are multiplicative because each new percent acts on the current amount.",
|
| 176 |
+
"That is why equal increases and decreases do not cancel each other out.",
|
| 177 |
+
],
|
| 178 |
+
"answer_path": [
|
| 179 |
+
"Pick a clean base value.",
|
| 180 |
+
"Apply each change in order.",
|
| 181 |
+
"Compare final with original.",
|
| 182 |
+
],
|
| 183 |
+
"common_trap": "Adding and subtracting the percentages directly instead of applying them sequentially.",
|
| 184 |
+
}
|
| 185 |
+
return {
|
| 186 |
+
"first_step": "Ask 'percent of what?' before writing any equation.",
|
| 187 |
+
"hint_1": "Separate the part, the percent, and the base quantity.",
|
| 188 |
+
"hint_2": "Write the relationship as part = percent × base, or reverse it if the base is unknown.",
|
| 189 |
+
"hint_3": "Only convert to a final percent form after the relationship is set up correctly.",
|
| 190 |
+
"walkthrough_steps": [
|
| 191 |
+
"Identify the base amount the percent is taken from.",
|
| 192 |
+
"Write the percent as a decimal or fraction.",
|
| 193 |
+
"Set up the percent relationship.",
|
| 194 |
+
"Solve for the requested quantity.",
|
| 195 |
+
],
|
| 196 |
+
"method_steps": [
|
| 197 |
+
"Most percent errors come from choosing the wrong base quantity, not from arithmetic.",
|
| 198 |
+
],
|
| 199 |
+
"answer_path": [
|
| 200 |
+
"Identify the base quantity.",
|
| 201 |
+
"Set up the percent relationship.",
|
| 202 |
+
"Solve for the target.",
|
| 203 |
+
],
|
| 204 |
+
"common_trap": "Using the part as the base or applying the percent to the wrong quantity.",
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
def _ratio_pack(self, question_text: str) -> Dict[str, Any]:
|
| 208 |
ratio_text = self._extract_ratio(question_text)
|
| 209 |
+
first = f"Treat {ratio_text} as matching parts of one whole." if ratio_text else "Treat the ratio numbers as parts, not final values."
|
| 210 |
+
return {
|
| 211 |
+
"first_step": first,
|
| 212 |
+
"hint_1": "Represent each ratio part using one shared multiplier such as k.",
|
| 213 |
+
"hint_2": "Use the given total or condition to find that shared multiplier.",
|
| 214 |
+
"hint_3": "Substitute back into the exact quantity the question asks for.",
|
|
|
|
|
|
|
| 215 |
"hint_ladder": [
|
| 216 |
+
first,
|
| 217 |
+
"Write each quantity as a ratio part times the same multiplier.",
|
| 218 |
+
"Use the total or condition to solve for the multiplier.",
|
| 219 |
+
"Build the requested expression from the actual quantities, not the raw ratio numbers.",
|
| 220 |
],
|
| 221 |
"walkthrough_steps": [
|
| 222 |
+
first,
|
| 223 |
+
"Assign variables to each ratio part using one multiplier.",
|
| 224 |
+
"Solve for the multiplier from the given condition.",
|
| 225 |
+
"Evaluate the requested quantity.",
|
| 226 |
],
|
| 227 |
"method_steps": [
|
| 228 |
+
"Ratio questions simplify when you convert the ratio into actual quantities with one shared multiplier.",
|
|
|
|
| 229 |
],
|
| 230 |
"answer_path": [
|
| 231 |
+
"Write each part with a common multiplier.",
|
| 232 |
+
"Solve for the multiplier.",
|
| 233 |
+
"Substitute into the target expression.",
|
| 234 |
],
|
| 235 |
+
"common_trap": "Using the raw ratio numbers as actual values instead of scaled parts.",
|
| 236 |
}
|
| 237 |
|
| 238 |
+
def _probability_pack(self, question_text: str) -> Dict[str, Any]:
|
| 239 |
+
q = question_text.lower()
|
| 240 |
+
pack = {
|
| 241 |
+
"first_step": "Define exactly what counts as a successful outcome before you count anything.",
|
| 242 |
+
"hint_1": "Count the favorable outcomes that satisfy the condition.",
|
| 243 |
+
"hint_2": "Count the total possible outcomes in the sample space.",
|
| 244 |
+
"hint_3": "Write probability as favorable over total, then simplify only at the end.",
|
| 245 |
+
"hint_ladder": [
|
| 246 |
+
"State the event in plain language: what outcome are you trying to get?",
|
| 247 |
+
"Count the favorable cases for that event.",
|
| 248 |
+
"Count the total possible cases in the sample space.",
|
| 249 |
+
"Build the probability as favorable over total.",
|
| 250 |
+
],
|
| 251 |
+
"walkthrough_steps": [
|
| 252 |
+
"Define the event the question cares about.",
|
| 253 |
+
"Count the favorable cases.",
|
| 254 |
+
"Count the total possible cases.",
|
| 255 |
+
"Write the probability as favorable divided by total.",
|
| 256 |
+
],
|
| 257 |
+
"method_steps": [
|
| 258 |
+
"Probability becomes much easier once the event and sample space are both explicit.",
|
| 259 |
+
"Many mistakes come from counting the wrong denominator, not the numerator.",
|
| 260 |
+
],
|
| 261 |
+
"answer_path": [
|
| 262 |
+
"Define the event.",
|
| 263 |
+
"Count favorable outcomes.",
|
| 264 |
+
"Count total outcomes.",
|
| 265 |
+
],
|
| 266 |
+
"common_trap": "Using the wrong denominator or forgetting outcomes that belong in the sample space.",
|
| 267 |
+
}
|
| 268 |
+
if "at least" in q or "at most" in q:
|
| 269 |
+
pack["hint_ladder"] = [
|
| 270 |
+
"Check whether the complement is easier to count than the event you want.",
|
| 271 |
+
"Count the easier side first if that reduces the work.",
|
| 272 |
+
"Convert back to the requested event at the end.",
|
| 273 |
+
"Then write the probability with the correct denominator.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
]
|
| 275 |
+
return pack
|
| 276 |
+
|
| 277 |
+
def _statistics_pack(self, question_text: str) -> Dict[str, Any]:
|
| 278 |
+
q = question_text.lower()
|
| 279 |
+
if any(k in q for k in ["variability", "spread", "standard deviation"]):
|
| 280 |
+
return {
|
| 281 |
+
"first_step": "Notice that the question is about spread, not average.",
|
| 282 |
+
"hint_1": "Compare how far the values sit from the centre of each set.",
|
| 283 |
+
"hint_2": "A set with values clustered tightly has lower variability than a set spread farther apart.",
|
| 284 |
+
"hint_3": "Pick the set with the widest spread, not the highest mean.",
|
| 285 |
+
"hint_ladder": [
|
| 286 |
+
"Ignore the mean at first and focus on how spread out the values are.",
|
| 287 |
+
"Compare the distance of the outer values from the middle of each set.",
|
| 288 |
+
"The set with the wider spread has greater variability.",
|
| 289 |
+
],
|
| 290 |
+
"walkthrough_steps": [
|
| 291 |
+
"Identify the centre of each set mentally or numerically.",
|
| 292 |
+
"Compare how tightly the values cluster around that centre.",
|
| 293 |
+
"Choose the set with the larger spread.",
|
| 294 |
+
],
|
| 295 |
+
"method_steps": [
|
| 296 |
+
"Variability measures spread, so a dataset can have the same mean as another and still be more variable.",
|
| 297 |
+
],
|
| 298 |
+
"answer_path": [
|
| 299 |
+
"Look at spread around the centre.",
|
| 300 |
+
"Compare clustering versus spread.",
|
| 301 |
+
],
|
| 302 |
+
"common_trap": "Choosing the set with the highest mean instead of the greatest spread.",
|
| 303 |
+
}
|
| 304 |
+
return {
|
| 305 |
+
"first_step": "Identify which statistical measure the question cares about before calculating.",
|
| 306 |
+
"hint_1": "Decide whether the task is about mean, median, range, spread, or another measure.",
|
| 307 |
+
"hint_2": "Organise the data cleanly if that makes the measure easier to see.",
|
| 308 |
+
"hint_3": "Use the exact definition of the requested measure.",
|
| 309 |
+
"walkthrough_steps": [
|
| 310 |
+
"Identify the requested statistic.",
|
| 311 |
+
"Organise the data.",
|
| 312 |
+
"Apply the definition of that statistic.",
|
| 313 |
+
],
|
| 314 |
+
"method_steps": [
|
| 315 |
+
"Statistics questions are easiest once you know which measure matters.",
|
| 316 |
+
],
|
| 317 |
+
"answer_path": [
|
| 318 |
+
"Identify the target statistic.",
|
| 319 |
+
"Apply its definition.",
|
| 320 |
+
],
|
| 321 |
+
"common_trap": "Using a nearby but different statistical measure.",
|
| 322 |
+
}
|
| 323 |
|
| 324 |
+
def _geometry_pack(self, question_text: str) -> Dict[str, Any]:
|
| 325 |
+
q = question_text.lower()
|
| 326 |
+
if "perimeter" in q and "rectangle" in q:
|
| 327 |
+
return {
|
| 328 |
+
"first_step": "Start with the perimeter formula for a rectangle: 2L + 2W.",
|
| 329 |
+
"hint_1": "Substitute the known perimeter and known side length into the formula.",
|
| 330 |
+
"hint_2": "Isolate the remaining side length after substitution.",
|
| 331 |
+
"hint_3": "Check the width in the perimeter formula once more.",
|
| 332 |
+
"walkthrough_steps": [
|
| 333 |
+
"Write the perimeter formula.",
|
| 334 |
+
"Plug in the given perimeter and length.",
|
| 335 |
+
"Solve for the width.",
|
| 336 |
+
],
|
| 337 |
+
"method_steps": [
|
| 338 |
+
"Geometry questions are often formula-matching questions first and algebra questions second.",
|
| 339 |
+
],
|
| 340 |
+
"answer_path": [
|
| 341 |
+
"Write the formula.",
|
| 342 |
+
"Substitute given values.",
|
| 343 |
+
"Solve for the missing side.",
|
| 344 |
+
],
|
| 345 |
+
"common_trap": "Forgetting that perimeter includes both lengths and both widths.",
|
| 346 |
+
}
|
| 347 |
+
return {
|
| 348 |
+
"first_step": "Identify the shape and the formula that matches it.",
|
| 349 |
+
"hint_1": "Write the relevant geometry formula before substituting numbers.",
|
| 350 |
+
"hint_2": "Substitute carefully and keep track of what the question actually asks for.",
|
| 351 |
+
"hint_3": "Use algebra only after the correct formula is in place.",
|
| 352 |
+
"walkthrough_steps": [
|
| 353 |
+
"Identify the shape.",
|
| 354 |
+
"Choose the correct formula.",
|
| 355 |
+
"Substitute values and solve.",
|
| 356 |
+
],
|
| 357 |
+
"method_steps": [
|
| 358 |
+
"Most geometry errors come from choosing the wrong formula or solving for the wrong quantity.",
|
| 359 |
+
],
|
| 360 |
+
"answer_path": [
|
| 361 |
+
"Match the shape to its formula.",
|
| 362 |
+
"Substitute the known values.",
|
| 363 |
+
"Solve for the target quantity.",
|
| 364 |
+
],
|
| 365 |
+
"common_trap": "Using the wrong formula or solving for the wrong dimension.",
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
def _topic_defaults(self, topic: str, question_text: str, options_text: Optional[List[str]]) -> Dict[str, Any]:
|
| 369 |
+
topic = self._normalize_topic(topic, question_text)
|
| 370 |
+
if topic == "algebra":
|
| 371 |
+
return self._algebra_pack(question_text)
|
| 372 |
+
if topic == "percent":
|
| 373 |
+
return self._percent_pack(question_text)
|
| 374 |
+
if topic == "ratio":
|
| 375 |
+
return self._ratio_pack(question_text)
|
| 376 |
+
if topic == "probability":
|
| 377 |
+
return self._probability_pack(question_text)
|
| 378 |
+
if topic == "statistics":
|
| 379 |
+
return self._statistics_pack(question_text)
|
| 380 |
+
if topic == "geometry":
|
| 381 |
+
return self._geometry_pack(question_text)
|
| 382 |
+
return {
|
| 383 |
+
"first_step": "Identify the exact relationship the question is testing before doing any arithmetic.",
|
| 384 |
+
"hint_1": "Separate what is given from what you need to find.",
|
| 385 |
+
"hint_2": "Build the relationship or formula that links those pieces.",
|
| 386 |
+
"hint_3": "Only calculate after the structure is correct.",
|
| 387 |
+
"walkthrough_steps": [
|
| 388 |
+
"State what is given and what is unknown.",
|
| 389 |
+
"Build the relationship between them.",
|
| 390 |
+
"Solve for the requested quantity.",
|
| 391 |
+
],
|
| 392 |
+
"method_steps": [
|
| 393 |
+
"General quant questions become clearer when you translate the wording into a structure first.",
|
| 394 |
+
],
|
| 395 |
+
"answer_path": [
|
| 396 |
+
"Identify the structure.",
|
| 397 |
+
"Set up the relationship.",
|
| 398 |
+
"Solve the target quantity.",
|
| 399 |
+
],
|
| 400 |
+
"common_trap": "Starting arithmetic before the structure of the problem is clear.",
|
| 401 |
+
}
|
| 402 |
|
| 403 |
def _merge_support_pack(self, generated: Dict[str, Any], stored: Optional[Dict[str, Any]], topic: str) -> Dict[str, Any]:
|
| 404 |
if not stored:
|
| 405 |
+
out = dict(generated)
|
| 406 |
+
out["support_source"] = "generated_question_specific"
|
| 407 |
+
return out
|
| 408 |
+
|
| 409 |
+
merged = dict(stored)
|
| 410 |
+
looks_generic = self._pack_looks_generic(stored, topic)
|
| 411 |
+
if looks_generic:
|
| 412 |
+
for key, value in generated.items():
|
| 413 |
+
if value:
|
| 414 |
+
merged[key] = value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
merged["support_source"] = "question_bank_refined"
|
| 416 |
else:
|
| 417 |
+
for key, value in generated.items():
|
| 418 |
+
if key not in merged or not merged.get(key):
|
| 419 |
+
merged[key] = value
|
| 420 |
+
merged["support_source"] = "question_bank"
|
| 421 |
return merged
|
| 422 |
|
| 423 |
+
def get_support_pack(self, *, question_id: Optional[str], question_text: str, options_text: Optional[List[str]], topic: Optional[str], category: Optional[str]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
resolved_topic = self._normalize_topic(topic, question_text)
|
| 425 |
generated = self._topic_defaults(resolved_topic, question_text, options_text)
|
| 426 |
+
stored = question_support_bank.get(question_id=question_id, question_text=question_text, options_text=options_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
pack = self._merge_support_pack(generated, stored, resolved_topic)
|
| 428 |
pack.setdefault("question_id", question_id)
|
| 429 |
pack.setdefault("question_text", question_text)
|
|
|
|
| 435 |
return pack
|
| 436 |
|
| 437 |
def _hint_ladder_from_pack(self, pack: Dict[str, Any]) -> List[str]:
|
| 438 |
+
lines: List[str] = []
|
| 439 |
+
if self._clean(pack.get("first_step")):
|
| 440 |
+
lines.append(self._clean(pack.get("first_step")))
|
|
|
|
| 441 |
for key in ("hint_1", "hint_2", "hint_3"):
|
| 442 |
value = self._clean(pack.get(key))
|
| 443 |
if value:
|
| 444 |
+
lines.append(value)
|
| 445 |
+
lines.extend(self._listify(pack.get("hint_ladder")))
|
| 446 |
+
lines.extend(self._listify(pack.get("hints")))
|
| 447 |
+
return self._dedupe(lines)
|
| 448 |
|
| 449 |
def _walkthrough_from_pack(self, pack: Dict[str, Any]) -> List[str]:
|
| 450 |
+
lines = self._listify(pack.get("walkthrough_steps"))
|
| 451 |
+
if not lines and self._clean(pack.get("first_step")):
|
| 452 |
+
lines.append(self._clean(pack.get("first_step")))
|
|
|
|
|
|
|
| 453 |
return self._dedupe(lines)
|
| 454 |
|
| 455 |
def _method_from_pack(self, pack: Dict[str, Any]) -> List[str]:
|
|
|
|
| 460 |
lines.extend(self._listify(pack.get("method_steps")))
|
| 461 |
lines.extend(self._listify(pack.get("method_explanation")))
|
| 462 |
if not lines:
|
| 463 |
+
lines.extend(self._walkthrough_from_pack(pack)[:2])
|
| 464 |
return self._dedupe(lines)
|
| 465 |
|
| 466 |
def _answer_path_from_pack(self, pack: Dict[str, Any]) -> List[str]:
|
| 467 |
+
lines = self._listify(pack.get("answer_path"))
|
| 468 |
+
if not lines:
|
| 469 |
+
lines = self._walkthrough_from_pack(pack)
|
|
|
|
|
|
|
| 470 |
return self._dedupe(lines)
|
| 471 |
|
| 472 |
+
def _verbosity_limit(self, verbosity: float, *, low: int, mid: int, high: int) -> int:
|
| 473 |
+
if verbosity < 0.28:
|
| 474 |
return low
|
| 475 |
+
if verbosity < 0.68:
|
| 476 |
return mid
|
| 477 |
return high
|
| 478 |
|
| 479 |
+
def build_response(self, *, question_id: Optional[str], question_text: str, options_text: Optional[List[str]], topic: Optional[str], category: Optional[str], help_mode: str, hint_stage: int, verbosity: float) -> Dict[str, Any]:
|
| 480 |
+
pack = self.get_support_pack(question_id=question_id, question_text=question_text, options_text=options_text, topic=topic, category=category)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
mode = (help_mode or "answer").lower()
|
| 482 |
+
stage = max(1, int(hint_stage or 1))
|
|
|
|
|
|
|
| 483 |
hint_ladder = self._hint_ladder_from_pack(pack)
|
| 484 |
walkthrough_steps = self._walkthrough_from_pack(pack)
|
| 485 |
method_steps = self._method_from_pack(pack)
|
| 486 |
answer_path = self._answer_path_from_pack(pack)
|
| 487 |
common_trap = self._clean(pack.get("common_trap"))
|
| 488 |
|
|
|
|
|
|
|
| 489 |
if mode == "hint":
|
| 490 |
+
idx = min(stage - 1, max(len(hint_ladder) - 1, 0))
|
| 491 |
if hint_ladder:
|
| 492 |
+
lines = [hint_ladder[idx]]
|
| 493 |
+
if verbosity >= 0.62 and idx + 1 < len(hint_ladder):
|
| 494 |
+
lines.append(hint_ladder[idx + 1])
|
| 495 |
+
if verbosity >= 0.82 and stage >= 3 and common_trap:
|
| 496 |
+
lines.append(f"Watch out for this trap: {common_trap}")
|
| 497 |
else:
|
| 498 |
+
lines = [self._clean(pack.get("first_step")) or "Start with the structure of the problem."]
|
| 499 |
+
elif mode in {"walkthrough", "instruction", "step_by_step"}:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
source = walkthrough_steps or answer_path or hint_ladder
|
| 501 |
+
lines = source[: self._verbosity_limit(verbosity, low=2, mid=4, high=6)]
|
| 502 |
+
if verbosity >= 0.8 and common_trap:
|
|
|
|
| 503 |
lines = list(lines) + [f"Watch out for this trap: {common_trap}"]
|
|
|
|
| 504 |
elif mode in {"method", "explain", "concept", "definition"}:
|
| 505 |
+
source = method_steps or walkthrough_steps or answer_path
|
| 506 |
+
lines = source[: self._verbosity_limit(verbosity, low=1, mid=2, high=4)]
|
| 507 |
+
if verbosity >= 0.72 and common_trap:
|
|
|
|
| 508 |
lines = list(lines) + [f"Common trap: {common_trap}"]
|
|
|
|
| 509 |
else:
|
| 510 |
source = answer_path or walkthrough_steps or hint_ladder
|
| 511 |
+
lines = source[: self._verbosity_limit(verbosity, low=2, mid=3, high=5)]
|
|
|
|
| 512 |
|
| 513 |
+
return {"lines": self._dedupe(lines), "pack": pack}
|
|
|
|
| 514 |
|
| 515 |
|
| 516 |
+
question_fallback_router = QuestionFallbackRouter()
|
question_support_loader.py
CHANGED
|
@@ -1,45 +1,66 @@
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
import re
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
-
from typing import Any, Dict, List, Optional
|
| 5 |
|
| 6 |
|
| 7 |
class QuestionSupportBank:
|
|
|
|
|
|
|
| 8 |
def __init__(self, data_path: Optional[str] = None) -> None:
|
| 9 |
base_dir = Path(__file__).resolve().parent
|
| 10 |
self.data_path = Path(data_path) if data_path else base_dir / "data" / "question_support_bank.jsonl"
|
| 11 |
self._loaded = False
|
|
|
|
| 12 |
self._by_id: Dict[str, Dict[str, Any]] = {}
|
| 13 |
self._by_text: Dict[str, Dict[str, Any]] = {}
|
| 14 |
self._by_signature: Dict[str, Dict[str, Any]] = {}
|
| 15 |
-
self.
|
| 16 |
|
| 17 |
def _normalize(self, text: Optional[str]) -> str:
|
| 18 |
cleaned = (text or "").strip().lower()
|
| 19 |
-
cleaned = cleaned.replace("’", "'")
|
|
|
|
| 20 |
cleaned = re.sub(r"\s+", " ", cleaned)
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
def _tokenize(self, text: Optional[str]) -> List[str]:
|
| 24 |
-
return re.findall(r"[a-z0-9%/]+", self._normalize(text))
|
| 25 |
|
| 26 |
def _normalize_choice(self, value: Any) -> str:
|
| 27 |
return self._normalize(str(value) if value is not None else "")
|
| 28 |
|
| 29 |
-
def
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
return " || ".join(cleaned)
|
| 32 |
|
| 33 |
-
def _question_signature(self, question_text: Optional[str], choices: Optional[List[Any]] = None) -> str:
|
| 34 |
q = self._normalize(question_text)
|
| 35 |
-
c = self._choice_signature(choices)
|
| 36 |
return f"{q} ## {c}" if c else q
|
| 37 |
|
| 38 |
def load(self) -> None:
|
|
|
|
| 39 |
self._by_id = {}
|
| 40 |
self._by_text = {}
|
| 41 |
self._by_signature = {}
|
| 42 |
-
self.
|
| 43 |
|
| 44 |
if self.data_path.exists():
|
| 45 |
with self.data_path.open("r", encoding="utf-8") as handle:
|
|
@@ -62,102 +83,113 @@ class QuestionSupportBank:
|
|
| 62 |
def _store_item(self, item: Dict[str, Any]) -> None:
|
| 63 |
if not isinstance(item, dict):
|
| 64 |
return
|
| 65 |
-
|
| 66 |
stored = dict(item)
|
| 67 |
-
qid = str(stored.get("question_id") or "").strip()
|
| 68 |
stem = stored.get("question_text") or stored.get("stem") or ""
|
| 69 |
choices = stored.get("options_text") or stored.get("choices") or []
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
signature = self._question_signature(stem, choices)
|
|
|
|
| 73 |
|
| 74 |
if qid:
|
| 75 |
self._by_id[qid] = stored
|
| 76 |
-
if
|
| 77 |
-
self._by_text[
|
| 78 |
if signature:
|
| 79 |
self._by_signature[signature] = stored
|
| 80 |
-
|
|
|
|
| 81 |
self._items.append(stored)
|
| 82 |
|
| 83 |
-
def
|
| 84 |
-
self,
|
| 85 |
-
*,
|
| 86 |
-
query_text: str,
|
| 87 |
-
query_choices: Optional[List[Any]],
|
| 88 |
-
candidate: Dict[str, Any],
|
| 89 |
-
) -> Tuple[float, float, float]:
|
| 90 |
cand_text = candidate.get("question_text") or candidate.get("stem") or ""
|
| 91 |
cand_choices = candidate.get("options_text") or candidate.get("choices") or []
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
q_tokens = set(self._tokenize(query_text))
|
| 94 |
c_tokens = set(self._tokenize(cand_text))
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
if
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
self._ensure_loaded()
|
| 118 |
qid = str(question_id or "").strip()
|
| 119 |
if qid and qid in self._by_id:
|
| 120 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
qtext = self._normalize(question_text)
|
| 123 |
if qtext and qtext in self._by_text:
|
| 124 |
-
return
|
| 125 |
|
| 126 |
-
|
| 127 |
-
if
|
| 128 |
-
return
|
| 129 |
|
| 130 |
if not qtext:
|
| 131 |
return None
|
| 132 |
|
| 133 |
-
|
|
|
|
| 134 |
best_score = 0.0
|
| 135 |
-
best_overlap = 0.0
|
| 136 |
-
best_choice = 0.0
|
| 137 |
-
|
| 138 |
for item in self._items:
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
query_choices=options_text,
|
| 142 |
-
candidate=item,
|
| 143 |
-
)
|
| 144 |
if score > best_score:
|
| 145 |
-
|
|
|
|
| 146 |
best_score = score
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
| 161 |
return None
|
| 162 |
|
| 163 |
def upsert(self, item: Dict[str, Any]) -> None:
|
|
@@ -169,4 +201,4 @@ class QuestionSupportBank:
|
|
| 169 |
return [dict(v) for v in self._items]
|
| 170 |
|
| 171 |
|
| 172 |
-
question_support_bank = QuestionSupportBank()
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
import json
|
| 4 |
import re
|
| 5 |
+
from difflib import SequenceMatcher
|
| 6 |
from pathlib import Path
|
| 7 |
+
from typing import Any, Dict, List, Optional
|
| 8 |
|
| 9 |
|
| 10 |
class QuestionSupportBank:
|
| 11 |
+
"""Load and retrieve authored question support entries with strong matching."""
|
| 12 |
+
|
| 13 |
def __init__(self, data_path: Optional[str] = None) -> None:
|
| 14 |
base_dir = Path(__file__).resolve().parent
|
| 15 |
self.data_path = Path(data_path) if data_path else base_dir / "data" / "question_support_bank.jsonl"
|
| 16 |
self._loaded = False
|
| 17 |
+
self._items: List[Dict[str, Any]] = []
|
| 18 |
self._by_id: Dict[str, Dict[str, Any]] = {}
|
| 19 |
self._by_text: Dict[str, Dict[str, Any]] = {}
|
| 20 |
self._by_signature: Dict[str, Dict[str, Any]] = {}
|
| 21 |
+
self._by_unordered_signature: Dict[str, Dict[str, Any]] = {}
|
| 22 |
|
| 23 |
def _normalize(self, text: Optional[str]) -> str:
|
| 24 |
cleaned = (text or "").strip().lower()
|
| 25 |
+
cleaned = cleaned.replace("’", "'").replace("“", '"').replace("”", '"')
|
| 26 |
+
cleaned = cleaned.replace("−", "-").replace("–", "-")
|
| 27 |
cleaned = re.sub(r"\s+", " ", cleaned)
|
| 28 |
+
cleaned = re.sub(r"\s*([=+\-*/:,;()])\s*", r"", cleaned)
|
| 29 |
+
return cleaned.strip()
|
| 30 |
|
| 31 |
def _tokenize(self, text: Optional[str]) -> List[str]:
|
| 32 |
+
return re.findall(r"[a-z0-9%/.]+", self._normalize(text))
|
| 33 |
|
| 34 |
def _normalize_choice(self, value: Any) -> str:
|
| 35 |
return self._normalize(str(value) if value is not None else "")
|
| 36 |
|
| 37 |
+
def _coerce_choices(self, choices: Optional[List[Any]]) -> List[str]:
|
| 38 |
+
if not choices:
|
| 39 |
+
return []
|
| 40 |
+
out: List[str] = []
|
| 41 |
+
for choice in choices:
|
| 42 |
+
normalized = self._normalize_choice(choice)
|
| 43 |
+
if normalized:
|
| 44 |
+
out.append(normalized)
|
| 45 |
+
return out
|
| 46 |
+
|
| 47 |
+
def _choice_signature(self, choices: Optional[List[Any]], *, ordered: bool = True) -> str:
|
| 48 |
+
cleaned = self._coerce_choices(choices)
|
| 49 |
+
if not ordered:
|
| 50 |
+
cleaned = sorted(cleaned)
|
| 51 |
return " || ".join(cleaned)
|
| 52 |
|
| 53 |
+
def _question_signature(self, question_text: Optional[str], choices: Optional[List[Any]] = None, *, ordered: bool = True) -> str:
|
| 54 |
q = self._normalize(question_text)
|
| 55 |
+
c = self._choice_signature(choices, ordered=ordered)
|
| 56 |
return f"{q} ## {c}" if c else q
|
| 57 |
|
| 58 |
def load(self) -> None:
|
| 59 |
+
self._items = []
|
| 60 |
self._by_id = {}
|
| 61 |
self._by_text = {}
|
| 62 |
self._by_signature = {}
|
| 63 |
+
self._by_unordered_signature = {}
|
| 64 |
|
| 65 |
if self.data_path.exists():
|
| 66 |
with self.data_path.open("r", encoding="utf-8") as handle:
|
|
|
|
| 83 |
def _store_item(self, item: Dict[str, Any]) -> None:
|
| 84 |
if not isinstance(item, dict):
|
| 85 |
return
|
|
|
|
| 86 |
stored = dict(item)
|
|
|
|
| 87 |
stem = stored.get("question_text") or stored.get("stem") or ""
|
| 88 |
choices = stored.get("options_text") or stored.get("choices") or []
|
| 89 |
+
qid = str(stored.get("question_id") or "").strip()
|
| 90 |
+
normalized_text = self._normalize(stem)
|
| 91 |
+
signature = self._question_signature(stem, choices, ordered=True)
|
| 92 |
+
unordered_signature = self._question_signature(stem, choices, ordered=False)
|
| 93 |
|
| 94 |
if qid:
|
| 95 |
self._by_id[qid] = stored
|
| 96 |
+
if normalized_text:
|
| 97 |
+
self._by_text[normalized_text] = stored
|
| 98 |
if signature:
|
| 99 |
self._by_signature[signature] = stored
|
| 100 |
+
if unordered_signature:
|
| 101 |
+
self._by_unordered_signature[unordered_signature] = stored
|
| 102 |
self._items.append(stored)
|
| 103 |
|
| 104 |
+
def _candidate_stats(self, *, query_text: str, query_choices: Optional[List[Any]], candidate: Dict[str, Any]) -> Dict[str, float]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
cand_text = candidate.get("question_text") or candidate.get("stem") or ""
|
| 106 |
cand_choices = candidate.get("options_text") or candidate.get("choices") or []
|
| 107 |
|
| 108 |
+
norm_query = self._normalize(query_text)
|
| 109 |
+
norm_cand = self._normalize(cand_text)
|
| 110 |
+
text_exact = 1.0 if norm_query and norm_query == norm_cand else 0.0
|
| 111 |
+
text_ratio = SequenceMatcher(None, norm_query, norm_cand).ratio() if norm_query and norm_cand else 0.0
|
| 112 |
+
|
| 113 |
q_tokens = set(self._tokenize(query_text))
|
| 114 |
c_tokens = set(self._tokenize(cand_text))
|
| 115 |
+
token_overlap = len(q_tokens & c_tokens) / max(len(q_tokens | c_tokens), 1) if q_tokens and c_tokens else 0.0
|
| 116 |
+
|
| 117 |
+
q_sig = self._choice_signature(query_choices, ordered=True)
|
| 118 |
+
c_sig = self._choice_signature(cand_choices, ordered=True)
|
| 119 |
+
q_unsig = self._choice_signature(query_choices, ordered=False)
|
| 120 |
+
c_unsig = self._choice_signature(cand_choices, ordered=False)
|
| 121 |
+
ordered_choice_match = 1.0 if q_sig and c_sig and q_sig == c_sig else 0.0
|
| 122 |
+
unordered_choice_match = 1.0 if q_unsig and c_unsig and q_unsig == c_unsig else 0.0
|
| 123 |
+
|
| 124 |
+
score = (
|
| 125 |
+
0.30 * text_exact
|
| 126 |
+
+ 0.28 * text_ratio
|
| 127 |
+
+ 0.22 * token_overlap
|
| 128 |
+
+ 0.12 * ordered_choice_match
|
| 129 |
+
+ 0.08 * unordered_choice_match
|
| 130 |
+
)
|
| 131 |
+
return {
|
| 132 |
+
"score": score,
|
| 133 |
+
"text_exact": text_exact,
|
| 134 |
+
"text_ratio": text_ratio,
|
| 135 |
+
"token_overlap": token_overlap,
|
| 136 |
+
"ordered_choice_match": ordered_choice_match,
|
| 137 |
+
"unordered_choice_match": unordered_choice_match,
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
def _annotate(self, item: Dict[str, Any], *, mode: str, stats: Optional[Dict[str, float]] = None) -> Dict[str, Any]:
|
| 141 |
+
out = dict(item)
|
| 142 |
+
out["support_match"] = {"mode": mode}
|
| 143 |
+
if stats:
|
| 144 |
+
out["support_match"].update({k: round(v, 4) for k, v in stats.items()})
|
| 145 |
+
return out
|
| 146 |
+
|
| 147 |
+
def get(self, question_id: Optional[str] = None, question_text: Optional[str] = None, options_text: Optional[List[Any]] = None) -> Optional[Dict[str, Any]]:
|
| 148 |
self._ensure_loaded()
|
| 149 |
qid = str(question_id or "").strip()
|
| 150 |
if qid and qid in self._by_id:
|
| 151 |
+
return self._annotate(self._by_id[qid], mode="question_id")
|
| 152 |
+
|
| 153 |
+
signature = self._question_signature(question_text, options_text, ordered=True)
|
| 154 |
+
if signature and signature in self._by_signature:
|
| 155 |
+
return self._annotate(self._by_signature[signature], mode="signature_exact")
|
| 156 |
|
| 157 |
qtext = self._normalize(question_text)
|
| 158 |
if qtext and qtext in self._by_text:
|
| 159 |
+
return self._annotate(self._by_text[qtext], mode="text_exact")
|
| 160 |
|
| 161 |
+
unordered_signature = self._question_signature(question_text, options_text, ordered=False)
|
| 162 |
+
if unordered_signature and unordered_signature in self._by_unordered_signature:
|
| 163 |
+
return self._annotate(self._by_unordered_signature[unordered_signature], mode="signature_unordered")
|
| 164 |
|
| 165 |
if not qtext:
|
| 166 |
return None
|
| 167 |
|
| 168 |
+
best_item: Optional[Dict[str, Any]] = None
|
| 169 |
+
best_stats: Optional[Dict[str, float]] = None
|
| 170 |
best_score = 0.0
|
|
|
|
|
|
|
|
|
|
| 171 |
for item in self._items:
|
| 172 |
+
stats = self._candidate_stats(query_text=question_text or "", query_choices=options_text, candidate=item)
|
| 173 |
+
score = stats["score"]
|
|
|
|
|
|
|
|
|
|
| 174 |
if score > best_score:
|
| 175 |
+
best_item = item
|
| 176 |
+
best_stats = stats
|
| 177 |
best_score = score
|
| 178 |
+
|
| 179 |
+
if not best_item or not best_stats:
|
| 180 |
+
return None
|
| 181 |
+
|
| 182 |
+
strong_choice = best_stats["ordered_choice_match"] >= 1.0 or best_stats["unordered_choice_match"] >= 1.0
|
| 183 |
+
threshold = 0.70 if strong_choice else 0.82
|
| 184 |
+
if best_stats["text_exact"] >= 1.0:
|
| 185 |
+
threshold = min(threshold, 0.55)
|
| 186 |
+
elif best_stats["text_ratio"] >= 0.94:
|
| 187 |
+
threshold = min(threshold, 0.68)
|
| 188 |
+
elif best_stats["token_overlap"] >= 0.75:
|
| 189 |
+
threshold = min(threshold, 0.74)
|
| 190 |
+
|
| 191 |
+
if best_score >= threshold:
|
| 192 |
+
return self._annotate(best_item, mode="fuzzy", stats=best_stats)
|
| 193 |
return None
|
| 194 |
|
| 195 |
def upsert(self, item: Dict[str, Any]) -> None:
|
|
|
|
| 201 |
return [dict(v) for v in self._items]
|
| 202 |
|
| 203 |
|
| 204 |
+
question_support_bank = QuestionSupportBank()
|
solver_router.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
from quant_solver import solve_quant
|
| 4 |
-
|
| 5 |
from solver_absolute_value import solve_absolute_value
|
| 6 |
from solver_algebra import solve_algebra
|
| 7 |
from solver_combinatorics import solve_combinatorics
|
|
@@ -19,21 +18,11 @@ from solver_work_rate import solve_work_rate
|
|
| 19 |
|
| 20 |
|
| 21 |
def route_solver(text: str):
|
| 22 |
-
"""
|
| 23 |
-
Route a question to the most appropriate dedicated solver.
|
| 24 |
-
|
| 25 |
-
Ordering matters:
|
| 26 |
-
- highly distinctive patterns first
|
| 27 |
-
- broad/greedy solvers later
|
| 28 |
-
- algebra and number properties relatively late to avoid stealing
|
| 29 |
-
percent / ratio / probability / rate questions
|
| 30 |
-
- final fallback goes to general quant solver
|
| 31 |
-
"""
|
| 32 |
if not text or not text.strip():
|
| 33 |
return solve_quant(text)
|
| 34 |
|
| 35 |
text = text.strip()
|
| 36 |
-
|
| 37 |
ordered_solvers = [
|
| 38 |
solve_standard_deviation,
|
| 39 |
solve_overlapping_sets,
|
|
@@ -43,22 +32,18 @@ def route_solver(text: str):
|
|
| 43 |
solve_distance_rate_time,
|
| 44 |
solve_percent,
|
| 45 |
solve_ratio,
|
|
|
|
| 46 |
solve_remainder,
|
| 47 |
solve_factorial,
|
| 48 |
solve_absolute_value,
|
| 49 |
solve_number_properties,
|
| 50 |
solve_algebra,
|
| 51 |
-
solve_probability
|
| 52 |
]
|
| 53 |
-
|
| 54 |
for solver in ordered_solvers:
|
| 55 |
try:
|
| 56 |
result = solver(text)
|
| 57 |
if result:
|
| 58 |
return result
|
| 59 |
except Exception:
|
| 60 |
-
# Fail open: if one specialized solver crashes,
|
| 61 |
-
# keep trying the rest instead of breaking the whole pipeline.
|
| 62 |
continue
|
| 63 |
-
|
| 64 |
-
return solve_quant(text)
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
from quant_solver import solve_quant
|
|
|
|
| 4 |
from solver_absolute_value import solve_absolute_value
|
| 5 |
from solver_algebra import solve_algebra
|
| 6 |
from solver_combinatorics import solve_combinatorics
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def route_solver(text: str):
|
| 21 |
+
"""Route to the most specific solver first."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
if not text or not text.strip():
|
| 23 |
return solve_quant(text)
|
| 24 |
|
| 25 |
text = text.strip()
|
|
|
|
| 26 |
ordered_solvers = [
|
| 27 |
solve_standard_deviation,
|
| 28 |
solve_overlapping_sets,
|
|
|
|
| 32 |
solve_distance_rate_time,
|
| 33 |
solve_percent,
|
| 34 |
solve_ratio,
|
| 35 |
+
solve_probability,
|
| 36 |
solve_remainder,
|
| 37 |
solve_factorial,
|
| 38 |
solve_absolute_value,
|
| 39 |
solve_number_properties,
|
| 40 |
solve_algebra,
|
|
|
|
| 41 |
]
|
|
|
|
| 42 |
for solver in ordered_solvers:
|
| 43 |
try:
|
| 44 |
result = solver(text)
|
| 45 |
if result:
|
| 46 |
return result
|
| 47 |
except Exception:
|
|
|
|
|
|
|
| 48 |
continue
|
| 49 |
+
return solve_quant(text)
|
|
|