Spaces:

j-js
/

GameAI

Sleeping

App Files Files Community

j-js commited on 23 days ago

Commit

ae81756

verified ·

1 Parent(s): ef50f17

Upload 5 files

Browse files

Files changed (5) hide show

conversation_logic.py +24 -207
formatting.py +75 -123
question_fallback_router.py +387 -364
question_support_loader.py +109 -77
solver_router.py +3 -18

conversation_logic.py CHANGED Viewed

@@ -1050,19 +1050,26 @@ def _should_try_solver(is_quant: bool, help_mode: str, solver_input: str) -> boo
     return help_mode in {"answer", "walkthrough", "instruction", "hint", "step_by_step"}
-def _should_prefer_question_support(help_mode: str, fallback_pack: Dict[str, Any]) -> bool:
     if not fallback_pack:
         return False
     support_source = str(fallback_pack.get("support_source", "")).strip().lower()
-    has_specific_content = support_source in {
-        "question_bank",
-        "question_bank_refined",
-        "question_id",
-        "question_text",
-        "generated_question_specific",
-    }
     if help_mode in {"hint", "walkthrough", "instruction", "step_by_step", "explain", "method"}:
-        return has_specific_content or bool(fallback_pack)
     return False
@@ -1124,208 +1131,17 @@ class ConversationEngine:
         question_topic = _normalize_classified_topic(classification.get("topic"), inferred_category, solver_input)
         resolved_intent = intent or detect_intent(user_text, help_mode)
-        if input_type in {"hint", "next_hint"}:
-            resolved_intent = "hint"
-        elif input_type == "confusion":
-            resolved_intent = "method"
-        elif input_type in {"solve", "question"} and resolved_intent in {"hint", "walkthrough", "step_by_step"}:
-            resolved_intent = "answer"
-        resolved_help_mode = help_mode or intent_to_help_mode(resolved_intent)
-        if input_type in {"hint", "next_hint"}:
-            resolved_help_mode = "hint"
-        elif input_type == "confusion":
-            resolved_help_mode = "explain"
-        elif resolved_help_mode == "step_by_step":
-            resolved_help_mode = "walkthrough"
-        prior_hint_stage = int(state.get("hint_stage", 0) or 0)
-        history_hint_stage = _history_hint_stage(chat_history)
-        hint_stage = _compute_hint_stage(input_type, prior_hint_stage, history_hint_stage)
-        is_quant = bool(solver_input) and (
-            inferred_category == "Quantitative" or is_quant_question(solver_input)
-        )
-        result = SolverResult(
-            domain="quant" if is_quant else "general",
-            solved=False,
-            help_mode=resolved_help_mode,
-            topic=question_topic if is_quant else "general",
-            used_retrieval=False,
-            used_generator=False,
-            steps=[],
-            teaching_chunks=[],
-            meta={},
-        )
-        solver_result: Optional[SolverResult] = None
-        if _should_try_solver(is_quant, resolved_help_mode, solver_input):
-            try:
-                solver_result = route_solver(solver_input)
-            except Exception:
-                solver_result = None
-            _apply_safe_step_sanitization(solver_result)
-        explainer_result = None
-        explainer_understood = False
-        explainer_scaffold: Dict[str, Any] = {}
-        if solver_input:
-            try:
-                explainer_result = route_explainer(solver_input)
-            except Exception:
-                explainer_result = None
-        if explainer_result is not None and getattr(explainer_result, "understood", False):
-            explainer_understood = True
-            explainer_scaffold = _extract_explainer_scaffold(explainer_result)
-        fallback_reply_core = ""
-        fallback_pack: Dict[str, Any] = {}
-        if solver_input:
-            fallback_reply_core, fallback_pack = _build_fallback_reply(
-                question_id=question_id,
-                question_text=solver_input,
-                options_text=options_text,
-                topic=question_topic,
-                category=inferred_category,
-                help_mode=resolved_help_mode,
-                hint_stage=hint_stage,
-                verbosity=verbosity,
-            )
-        question_specific_reply_core = _build_question_specific_reply(
-            question_text=solver_input,
-            options_text=options_text,
-            classified_topic=question_topic,
-            help_mode=resolved_help_mode,
-            input_type=input_type,
-            user_text=user_text,
-        )
-        if solver_result is not None:
-            result.meta = result.meta or {}
-            solver_topic = getattr(solver_result, "topic", None) or "unknown"
-            compatible_topics = {
-                question_topic,
-                "general_quant",
-                "general",
-                "unknown",
-            }
-            if question_topic == "algebra":
-                compatible_topics.update({"ratio"})
-            elif question_topic == "ratio":
-                compatible_topics.update({"algebra"})
-            elif question_topic == "percent":
-                compatible_topics.update({"ratio", "algebra"})
-            if solver_topic in compatible_topics:
-                result = solver_result
-                result.domain = "quant"
-                result.meta = result.meta or {}
-                result.topic = question_topic if question_topic else solver_topic
-                result.meta["solver_topic_accepted"] = solver_topic
-            else:
-                result.meta["solver_topic_rejected"] = solver_topic
-                result.meta["solver_topic_expected"] = question_topic
-                result.topic = question_topic if is_quant else result.topic
-        else:
-            result.meta = result.meta or {}
-            result.topic = question_topic if is_quant else result.topic
-        _apply_safe_step_sanitization(result)
-        solver_steps = _get_result_steps(result)
-        solver_has_steps = bool(solver_steps)
-        prefer_question_support = _should_prefer_question_support(resolved_help_mode, fallback_pack)
-        direct_solve_request = _is_direct_solve_request(user_text or solver_input, resolved_intent)
-        solver_topic_ok = result.meta.get("solver_topic_rejected") is None
-        result.help_mode = resolved_help_mode
-        result.meta = result.meta or {}
-        result.meta["hint_stage"] = hint_stage
-        result.meta["max_stage"] = 4
-        result.meta["recovered_question_text"] = solver_input
-        result.meta["question_id"] = question_id
-        result.meta["classified_topic"] = question_topic if question_topic else "general"
-        result.meta["explainer_understood"] = explainer_understood
-        result.meta["explainer_scaffold"] = explainer_scaffold
-        if input_type == "topic_query":
-            support_topic = fallback_pack.get("topic") if fallback_pack else ""
-            final_topic = _specific_topic_from_question(
-                solver_input,
-                support_topic,
-                question_topic if question_topic else "general",
-            ) or support_topic or question_topic or "general"
-            topic_reply_core = _build_topic_query_reply(
-                question_text=solver_input,
-                fallback_topic=support_topic,
-                classified_topic=question_topic if question_topic else "general",
-                category=inferred_category if inferred_category else "General",
-            )
-            reply = format_reply(
-                topic_reply_core,
-                tone=tone,
-                verbosity=verbosity,
-                transparency=transparency,
-                help_mode="answer",
-                hint_stage=hint_stage,
-                topic=final_topic,
-            )
-            result.topic = final_topic
-            result.reply = reply
-            result.help_mode = "answer"
-            result.meta["response_source"] = "topic_classifier"
-            result.meta["question_support_used"] = bool(fallback_pack)
-            result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None
-            result.meta["question_support_topic"] = support_topic or None
-            result.meta["help_mode"] = "answer"
-            result.meta["intent"] = "topic_query"
-            result.meta["question_text"] = solver_input or ""
-            result.meta["options_count"] = len(options_text or [])
-            result.meta["category"] = inferred_category if inferred_category else "General"
-            result.meta["user_last_input_type"] = input_type
-            result.meta["built_on_previous_turn"] = built_on_previous_turn
-            state = _update_session_state(
-                state,
-                question_text=solver_input,
-                question_id=question_id,
-                hint_stage=hint_stage,
-                user_last_input_type=input_type,
-                built_on_previous_turn=built_on_previous_turn,
-                help_mode="answer",
-                intent="topic_query",
-                topic=result.topic,
-                category=inferred_category,
-            )
-            result.meta["session_state"] = state
-            result.meta["used_retrieval"] = False
-            result.meta["used_generator"] = False
-            result.meta["can_reveal_answer"] = False
-            result.answer_letter = None
-            result.answer_value = None
-            result.internal_answer = None
-            result.meta["internal_answer"] = None
-            return result
-        if fallback_pack and fallback_pack.get("topic") == "statistics":
-            qlow = (solver_input or "").lower()
-            if any(k in qlow for k in ["variability", "spread", "standard deviation"]):
-                if resolved_help_mode == "answer":
-                    fallback_reply_core = (
-                        "- Notice this is asking about variability, which means spread, not the mean.\n"
-                        "- Compare how far the smallest and largest values sit from the middle value in each dataset.\n"
-                        "- The set with the widest spread has the greatest variability."
-                    )
         if input_type in {"hint", "next_hint"}:
             hint_lines: List[str] = []
             if fallback_pack:
                 fallback_hints = _safe_meta_list(fallback_pack.get("hint_ladder", []))
                 if fallback_hints:
                     idx = min(max(hint_stage - 1, 0), len(fallback_hints) - 1)
                     hint_lines = [fallback_hints[idx]]
             if not hint_lines:
                 custom_ladder = _question_specific_hint_ladder(
@@ -1336,12 +1152,13 @@ class ConversationEngine:
                 if custom_ladder:
                     idx = min(max(hint_stage - 1, 0), len(custom_ladder) - 1)
                     hint_lines = [custom_ladder[idx]]
             if not hint_lines and explainer_scaffold:
                 ladder = _safe_meta_list(explainer_scaffold.get("hint_ladder", []))
                 first_move = _safe_meta_text(explainer_scaffold.get("first_move"))
                 next_hint_text = _safe_meta_text(explainer_scaffold.get("next_hint"))
                 if hint_stage <= 1 and first_move:
                     hint_lines = [first_move]
                 elif ladder:
@@ -1360,7 +1177,7 @@ class ConversationEngine:
                 hint_lines = [_minimal_generic_reply(inferred_category)]
             reply_core = "\n".join(f"- {line}" for line in hint_lines if str(line).strip())
-            result.meta["response_source"] = "hint_ladder"
             result.meta["question_support_used"] = bool(fallback_pack)
             result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None
             result.meta["question_support_topic"] = fallback_pack.get("topic") if fallback_pack else None
@@ -1377,7 +1194,7 @@ class ConversationEngine:
         elif question_specific_reply_core and (
             input_type not in {"hint", "next_hint"}
-            and not (prefer_question_support and fallback_reply_core)
             and (
                 _is_help_first_mode(resolved_help_mode)
                 or input_type in {"other", "confusion"}
@@ -1488,7 +1305,7 @@ class ConversationEngine:
             )
         elif fallback_reply_core:
             reply_core = fallback_reply_core
-            result.meta["response_source"] = "fallback"
             result.meta["question_support_used"] = bool(fallback_pack)
             result.meta["question_support_source"] = fallback_pack.get("support_source")
             result.meta["question_support_topic"] = fallback_pack.get("topic")

     return help_mode in {"answer", "walkthrough", "instruction", "hint", "step_by_step"}
+def _support_pack_is_strong(fallback_pack: Dict[str, Any]) -> bool:
     if not fallback_pack:
         return False
     support_source = str(fallback_pack.get("support_source", "")).strip().lower()
+    support_match = fallback_pack.get("support_match") or {}
+    match_mode = str(support_match.get("mode", "")).strip().lower()
+    if support_source in {"question_bank", "question_bank_refined"}:
+        return True
+    if match_mode in {"question_id", "signature_exact", "text_exact", "signature_unordered", "fuzzy"}:
+        return True
+    if support_source == "generated_question_specific":
+        return bool(fallback_pack.get("topic") and _safe_meta_list(fallback_pack.get("hint_ladder", [])))
+    return bool(fallback_pack)
+def _should_prefer_question_support(help_mode: str, fallback_pack: Dict[str, Any]) -> bool:
+    if not fallback_pack:
+        return False
     if help_mode in {"hint", "walkthrough", "instruction", "step_by_step", "explain", "method"}:
+        return _support_pack_is_strong(fallback_pack)
     return False
         question_topic = _normalize_classified_topic(classification.get("topic"), inferred_category, solver_input)
         resolved_intent = intent or detect_intent(user_text, help_mode)
         if input_type in {"hint", "next_hint"}:
             hint_lines: List[str] = []
+            support_is_strong = _support_pack_is_strong(fallback_pack)
             if fallback_pack:
                 fallback_hints = _safe_meta_list(fallback_pack.get("hint_ladder", []))
                 if fallback_hints:
                     idx = min(max(hint_stage - 1, 0), len(fallback_hints) - 1)
                     hint_lines = [fallback_hints[idx]]
+                    if verbosity >= 0.62 and idx + 1 < len(fallback_hints):
+                        hint_lines.append(fallback_hints[idx + 1])
             if not hint_lines:
                 custom_ladder = _question_specific_hint_ladder(
                 if custom_ladder:
                     idx = min(max(hint_stage - 1, 0), len(custom_ladder) - 1)
                     hint_lines = [custom_ladder[idx]]
+                    if verbosity >= 0.62 and idx + 1 < len(custom_ladder):
+                        hint_lines.append(custom_ladder[idx + 1])
             if not hint_lines and explainer_scaffold:
                 ladder = _safe_meta_list(explainer_scaffold.get("hint_ladder", []))
                 first_move = _safe_meta_text(explainer_scaffold.get("first_move"))
                 next_hint_text = _safe_meta_text(explainer_scaffold.get("next_hint"))
                 if hint_stage <= 1 and first_move:
                     hint_lines = [first_move]
                 elif ladder:
                 hint_lines = [_minimal_generic_reply(inferred_category)]
             reply_core = "\n".join(f"- {line}" for line in hint_lines if str(line).strip())
+            result.meta["response_source"] = "hint_ladder" if support_is_strong else "hint_router"
             result.meta["question_support_used"] = bool(fallback_pack)
             result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None
             result.meta["question_support_topic"] = fallback_pack.get("topic") if fallback_pack else None
         elif question_specific_reply_core and (
             input_type not in {"hint", "next_hint"}
+            and not (_support_pack_is_strong(fallback_pack) and fallback_reply_core)
             and (
                 _is_help_first_mode(resolved_help_mode)
                 or input_type in {"other", "confusion"}
             )
         elif fallback_reply_core:
             reply_core = fallback_reply_core
+            result.meta["response_source"] = "question_support" if _support_pack_is_strong(fallback_pack) else "fallback"
             result.meta["question_support_used"] = bool(fallback_pack)
             result.meta["question_support_source"] = fallback_pack.get("support_source")
             result.meta["question_support_topic"] = fallback_pack.get("topic")

formatting.py CHANGED Viewed

@@ -19,143 +19,120 @@ def _clean_lines(core: str) -> List[str]:
     for line in (core or "").splitlines():
         cleaned = line.strip()
         if cleaned:
-            lines.append(cleaned)
     return lines
 def _normalize_key(text: str) -> str:
-    text = (text or "").strip().lower()
-    text = text.replace("’", "'")
-    text = re.sub(r"\s+", " ", text)
-    return text
 def _dedupe_lines(lines: List[str]) -> List[str]:
     seen = set()
-    output: List[str] = []
     for line in lines:
         key = _normalize_key(line)
         if key and key not in seen:
             seen.add(key)
-            output.append(line.strip())
-    return output
-def _strip_bullet_prefix(text: str) -> str:
-    return re.sub(r"^\s*[-•]\s*", "", (text or "").strip())
 def _normalize_display_lines(lines: List[str]) -> List[str]:
-    cleaned: List[str] = []
-    for line in lines:
-        item = _strip_bullet_prefix(line)
-        if item:
-            cleaned.append(item)
-    return cleaned
-def _limit_steps(steps: List[str], verbosity: float, minimum: int = 1) -> List[str]:
-    if not steps:
         return []
-    if verbosity < 0.2:
         limit = minimum
-    elif verbosity < 0.45:
-        limit = max(minimum, 2)
-    elif verbosity < 0.7:
-        limit = max(minimum, 3)
     else:
-        limit = max(minimum, 5)
-    return steps[:limit]
-def _extract_topic_from_text(text: str, fallback: Optional[str] = None) -> str:
-    low = (text or "").lower()
-    if fallback:
-        return fallback
-    if any(word in low for word in ["equation", "variable", "isolate", "algebra"]):
-        return "algebra"
-    if any(word in low for word in ["percent", "percentage", "%"]):
-        return "percent"
-    if any(word in low for word in ["ratio", "proportion"]):
-        return "ratio"
-    if any(word in low for word in ["probability", "outcome", "chance", "odds"]):
-        return "probability"
-    if any(word in low for word in ["mean", "median", "average", "data", "variance", "standard deviation"]):
-        return "statistics"
-    if any(word in low for word in ["triangle", "circle", "angle", "area", "perimeter", "circumference", "rectangle"]):
-        return "geometry"
-    if any(word in low for word in ["integer", "factor", "multiple", "prime", "remainder", "divisible"]):
-        return "number_theory"
-    return "general"
-def _why_line(topic: Optional[str]) -> str:
-    topic = (topic or "general").strip().lower()
     if topic == "algebra":
-        return "Why this helps: algebra becomes easier when you reverse operations in order and keep the variable isolated."
     if topic == "percent":
-        return "Why this helps: percent problems work best when you identify the base amount and apply each change to the correct value."
     if topic == "ratio":
-        return "Why this helps: ratios represent parts of a whole, so turning them into total parts keeps the set-up consistent."
     if topic == "probability":
-        return "Why this helps: probability is favorable outcomes over total outcomes, so clear counting matters first."
     if topic == "statistics":
-        return "Why this helps: statistics questions depend on choosing the right measure before you calculate anything."
     if topic == "geometry":
-        return "Why this helps: geometry usually becomes manageable once you identify the correct formula and substitute carefully."
-    if topic == "number_theory":
-        return "Why this helps: number theory questions usually depend on patterns, divisibility, or factor structure rather than brute force."
-    return "Why this helps: identifying the structure first makes the next step clearer and reduces avoidable mistakes."
 def _tone_rewrite(line: str, tone: float, position: int = 0) -> str:
     text = (line or "").strip()
     if not text:
         return text
-    if tone < 0.2:
         return text
-    if tone < 0.45:
-        return text[:1].upper() + text[1:] if text else text
-    if tone < 0.75:
         return f"Start here: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
-    return f"A good place to start is this: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
 def _transparency_expansion(line: str, topic: str, transparency: float, position: int = 0) -> str:
     text = (line or "").strip()
-    if not text:
-        return text
-    if transparency < 0.35:
         return text
     if transparency < 0.7:
         if position == 0:
             if topic == "algebra":
-                return f"{text} This helps isolate the variable."
             if topic == "percent":
-                return f"{text} This keeps track of the percent relationship correctly."
             if topic == "ratio":
-                return f"{text} This helps turn the ratio into usable parts."
             if topic == "probability":
-                return f"{text} This sets up favorable versus total outcomes."
         return text
     if position == 0:
         if topic == "algebra":
-            return f"{text} In algebra, the goal is to isolate the variable by reversing operations in the opposite order."
         if topic == "percent":
-            return f"{text} Percent questions are easiest when you identify the base amount and apply the change to the right quantity."
         if topic == "ratio":
-            return f"{text} Ratio questions become easier once you treat the ratio numbers as parts of one total."
         if topic == "probability":
-            return f"{text} Probability depends on counting the favorable cases and the total possible cases accurately."
         if topic == "statistics":
-            return f"{text} Statistics questions usually depend on choosing the correct measure before calculating."
         if topic == "geometry":
-            return f"{text} Geometry problems are usually solved by matching the shape to the correct formula first."
-        return f"{text} This works because it clarifies the structure before you calculate."
     return text
@@ -179,7 +156,6 @@ def format_reply(
 ) -> str:
     prefix = style_prefix(tone)
     core = (core or "").strip()
     if not core:
         return prefix or "Start with the structure of the problem."
@@ -189,77 +165,58 @@ def format_reply(
     resolved_topic = _extract_topic_from_text(core, topic)
     normalized_lines = _normalize_display_lines(lines)
     output: List[str] = []
     if prefix:
-        output.append(prefix)
-        output.append("")
     if help_mode == "hint":
         if verbosity < 0.25:
-            idx = max(0, min(int(hint_stage or 1) - 1, len(normalized_lines) - 1))
             shown = [normalized_lines[idx]]
-        elif verbosity < 0.6:
-            idx = max(0, min(int(hint_stage or 1) - 1, len(normalized_lines) - 1))
-            shown = normalized_lines[idx:idx + 2] or [normalized_lines[idx]]
         else:
-            shown = normalized_lines[: min(3, len(normalized_lines))]
         shown = _styled_lines(shown, tone, transparency, resolved_topic)
         output.append("Hint:")
-        for line in shown:
-            output.append(f"- {line}")
         if transparency >= 0.75:
-            output.append("")
-            output.append(_why_line(resolved_topic))
         return "\n".join(output).strip()
     if help_mode in {"walkthrough", "instruction", "step_by_step"}:
         shown = _limit_steps(normalized_lines, verbosity, minimum=2 if help_mode == "walkthrough" else 1)
         shown = _styled_lines(shown, tone, transparency, resolved_topic)
         output.append("Walkthrough:" if help_mode == "walkthrough" else "Step-by-step path:")
-        for line in shown:
-            output.append(f"- {line}")
         if transparency >= 0.7:
-            output.append("")
-            output.append(_why_line(resolved_topic))
         return "\n".join(output).strip()
     if help_mode in {"method", "explain", "concept", "definition"}:
         shown = _limit_steps(normalized_lines, verbosity, minimum=1)
         shown = _styled_lines(shown, tone, transparency, resolved_topic)
         output.append("Explanation:")
-        for line in shown:
-            output.append(f"- {line}")
         if transparency >= 0.6:
-            output.append("")
-            output.append(_why_line(resolved_topic))
         return "\n".join(output).strip()
     if help_mode == "answer":
         shown = _limit_steps(normalized_lines, verbosity, minimum=2)
-        shown = _styled_lines(shown, tone, transparency if verbosity >= 0.45 else min(transparency, 0.4), resolved_topic)
         output.append("Answer path:")
-        for line in shown:
-            output.append(f"- {line}")
         if transparency >= 0.75:
-            output.append("")
-            output.append(_why_line(resolved_topic))
         return "\n".join(output).strip()
     shown = _limit_steps(normalized_lines, verbosity, minimum=1)
     shown = _styled_lines(shown, tone, transparency, resolved_topic)
-    for line in shown:
-        output.append(f"- {line}")
     if transparency >= 0.8:
-        output.append("")
-        output.append(_why_line(resolved_topic))
     return "\n".join(output).strip()
@@ -273,23 +230,18 @@ def format_explainer_response(
 ) -> str:
     if not result:
         return "I can help explain what the question is asking, but I need the full wording of the question."
     summary = getattr(result, "summary", "") or ""
     teaching_points = getattr(result, "teaching_points", []) or []
     core_lines: List[str] = []
     if isinstance(summary, str) and summary.strip():
         core_lines.append(summary.strip())
     if isinstance(teaching_points, list):
         for item in teaching_points:
             text = str(item).strip()
             if text:
                 core_lines.append(text)
     if not core_lines:
         core_lines = ["Start by identifying what the question is asking."]
     return format_reply(
         core="\n".join(core_lines),
         tone=tone,
@@ -298,4 +250,4 @@ def format_explainer_response(
         help_mode=help_mode,
         hint_stage=hint_stage,
         topic=getattr(result, "topic", None),
-    )

     for line in (core or "").splitlines():
         cleaned = line.strip()
         if cleaned:
+            lines.append(cleaned.lstrip("- ").strip())
     return lines
 def _normalize_key(text: str) -> str:
+    return re.sub(r"\s+", " ", (text or "").strip().lower())
 def _dedupe_lines(lines: List[str]) -> List[str]:
     seen = set()
+    out: List[str] = []
     for line in lines:
         key = _normalize_key(line)
         if key and key not in seen:
             seen.add(key)
+            out.append(line.strip())
+    return out
+def _extract_topic_from_text(core: str, topic: Optional[str]) -> str:
+    if topic:
+        return str(topic).strip().lower()
+    text = (core or "").lower()
+    if "probability" in text or "favorable" in text or "sample space" in text:
+        return "probability"
+    if "percent" in text or "%" in text:
+        return "percent"
+    if "ratio" in text or "multiplier" in text:
+        return "ratio"
+    if "variable" in text or "equation" in text:
+        return "algebra"
+    if "variability" in text or "standard deviation" in text or "spread" in text:
+        return "statistics"
+    if "rectangle" in text or "perimeter" in text or "area" in text:
+        return "geometry"
+    return "general"
 def _normalize_display_lines(lines: List[str]) -> List[str]:
+    return [re.sub(r"\s+", " ", (line or "").strip()) for line in lines if str(line).strip()]
+def _limit_steps(lines: List[str], verbosity: float, minimum: int = 1) -> List[str]:
+    if not lines:
         return []
+    if verbosity < 0.22:
         limit = minimum
+    elif verbosity < 0.55:
+        limit = max(minimum, min(2, len(lines)))
+    elif verbosity < 0.82:
+        limit = max(minimum, min(4, len(lines)))
     else:
+        limit = len(lines)
+    return lines[:limit]
+def _why_line(topic: str) -> str:
     if topic == "algebra":
+        return "Why this helps: reversing operations in the right order keeps the equation equivalent while you isolate the variable."
     if topic == "percent":
+        return "Why this helps: percent questions usually break when the base quantity is chosen incorrectly."
     if topic == "ratio":
+        return "Why this helps: ratio numbers are usually parts, not the final quantities themselves."
     if topic == "probability":
+        return "Why this helps: the numerator and denominator must be counted under the same rules."
     if topic == "statistics":
+        return "Why this helps: statistics questions depend on choosing the right measure before calculating."
     if topic == "geometry":
+        return "Why this helps: matching the right formula to the shape simplifies the rest of the work."
+    return "Why this helps: getting the structure right first makes the next step clearer."
 def _tone_rewrite(line: str, tone: float, position: int = 0) -> str:
     text = (line or "").strip()
     if not text:
         return text
+    if tone < 0.25:
         return text
+    if tone < 0.55:
         return f"Start here: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
+    if tone < 0.8:
+        return f"A good place to start is this: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
+    return f"You can start with this: {text[0].lower() + text[1:] if len(text) > 1 else text.lower()}" if position == 0 else text
 def _transparency_expansion(line: str, topic: str, transparency: float, position: int = 0) -> str:
     text = (line or "").strip()
+    if not text or transparency < 0.35:
         return text
     if transparency < 0.7:
         if position == 0:
             if topic == "algebra":
+                return f"{text} This keeps the equation balanced while you isolate the variable."
             if topic == "percent":
+                return f"{text} This keeps the percent relationship tied to the correct base quantity."
             if topic == "ratio":
+                return f"{text} This turns the ratio into usable quantities instead of labels."
             if topic == "probability":
+                return f"{text} This separates successful outcomes from total outcomes."
         return text
     if position == 0:
         if topic == "algebra":
+            return f"{text} In algebra, each step should preserve an equivalent equation so the solution does not change while the variable is isolated."
         if topic == "percent":
+            return f"{text} Percent problems become clearer once the base quantity is fixed, because every percentage must refer back to some amount."
         if topic == "ratio":
+            return f"{text} Ratio numbers usually describe relative parts, so turning them into multiples of one common quantity is what makes the setup usable."
         if topic == "probability":
+            return f"{text} Probability depends on a consistent sample space, so the numerator and denominator must be counted under the same rules."
         if topic == "statistics":
+            return f"{text} Statistics questions often hinge on choosing the right measure first, because different measures capture different features of the data."
         if topic == "geometry":
+            return f"{text} Geometry problems often become routine once the correct formula is chosen, because the rest is usually substitution and algebra."
+        return f"{text} This makes the underlying structure explicit before you calculate."
     return text
 ) -> str:
     prefix = style_prefix(tone)
     core = (core or "").strip()
     if not core:
         return prefix or "Start with the structure of the problem."
     resolved_topic = _extract_topic_from_text(core, topic)
     normalized_lines = _normalize_display_lines(lines)
     output: List[str] = []
     if prefix:
+        output.extend([prefix, ""])
     if help_mode == "hint":
+        idx = max(0, min(int(hint_stage or 1) - 1, len(normalized_lines) - 1))
         if verbosity < 0.25:
             shown = [normalized_lines[idx]]
+        elif verbosity < 0.62:
+            shown = normalized_lines[idx: idx + 2] or [normalized_lines[idx]]
         else:
+            shown = normalized_lines[: min(4, len(normalized_lines))]
         shown = _styled_lines(shown, tone, transparency, resolved_topic)
         output.append("Hint:")
+        output.extend(f"- {line}" for line in shown)
         if transparency >= 0.75:
+            output.extend(["", _why_line(resolved_topic)])
         return "\n".join(output).strip()
     if help_mode in {"walkthrough", "instruction", "step_by_step"}:
         shown = _limit_steps(normalized_lines, verbosity, minimum=2 if help_mode == "walkthrough" else 1)
         shown = _styled_lines(shown, tone, transparency, resolved_topic)
         output.append("Walkthrough:" if help_mode == "walkthrough" else "Step-by-step path:")
+        output.extend(f"- {line}" for line in shown)
         if transparency >= 0.7:
+            output.extend(["", _why_line(resolved_topic)])
         return "\n".join(output).strip()
     if help_mode in {"method", "explain", "concept", "definition"}:
         shown = _limit_steps(normalized_lines, verbosity, minimum=1)
         shown = _styled_lines(shown, tone, transparency, resolved_topic)
         output.append("Explanation:")
+        output.extend(f"- {line}" for line in shown)
         if transparency >= 0.6:
+            output.extend(["", _why_line(resolved_topic)])
         return "\n".join(output).strip()
     if help_mode == "answer":
         shown = _limit_steps(normalized_lines, verbosity, minimum=2)
+        answer_transparency = transparency if verbosity >= 0.45 else min(transparency, 0.45)
+        shown = _styled_lines(shown, tone, answer_transparency, resolved_topic)
         output.append("Answer path:")
+        output.extend(f"- {line}" for line in shown)
         if transparency >= 0.75:
+            output.extend(["", _why_line(resolved_topic)])
         return "\n".join(output).strip()
     shown = _limit_steps(normalized_lines, verbosity, minimum=1)
     shown = _styled_lines(shown, tone, transparency, resolved_topic)
+    output.extend(f"- {line}" for line in shown)
     if transparency >= 0.8:
+        output.extend(["", _why_line(resolved_topic)])
     return "\n".join(output).strip()
 ) -> str:
     if not result:
         return "I can help explain what the question is asking, but I need the full wording of the question."
     summary = getattr(result, "summary", "") or ""
     teaching_points = getattr(result, "teaching_points", []) or []
     core_lines: List[str] = []
     if isinstance(summary, str) and summary.strip():
         core_lines.append(summary.strip())
     if isinstance(teaching_points, list):
         for item in teaching_points:
             text = str(item).strip()
             if text:
                 core_lines.append(text)
     if not core_lines:
         core_lines = ["Start by identifying what the question is asking."]
     return format_reply(
         core="\n".join(core_lines),
         tone=tone,
         help_mode=help_mode,
         hint_stage=hint_stage,
         topic=getattr(result, "topic", None),
+    )

question_fallback_router.py CHANGED Viewed

@@ -1,9 +1,25 @@
 import re
 from typing import Any, Dict, List, Optional
 from question_support_loader import question_support_bank
 class QuestionFallbackRouter:
     def _clean(self, text: Optional[str]) -> str:
         return (text or "").strip()
@@ -16,8 +32,7 @@ class QuestionFallbackRouter:
         if isinstance(value, tuple):
             return [str(v).strip() for v in value if str(v).strip()]
         if isinstance(value, str):
-            text = value.strip()
-            return [text] if text else []
         return []
     def _dedupe(self, items: List[str]) -> List[str]:
@@ -34,338 +49,381 @@ class QuestionFallbackRouter:
     def _normalize_topic(self, topic: Optional[str], question_text: str) -> str:
         q = (question_text or "").lower()
         t = (topic or "").strip().lower()
         if t and t not in {"general", "unknown", "general_quant", "quant"}:
             return t
-        if t == "quant":
-            t = ""
         if "%" in q or "percent" in q:
             return "percent"
-        if "ratio" in q or re.search(r"\b\d+\s*:\s*\d+\b", q):
-            return "ratio"
-        if any(k in q for k in ["probability", "odds", "chance", "random"]):
             return "probability"
-        if any(k in q for k in ["remainder", "factor", "multiple", "prime", "divisible"]):
             return "number_theory"
-        if any(k in q for k in ["triangle", "circle", "angle", "area", "perimeter", "rectangle", "circumference"]):
             return "geometry"
-        if any(k in q for k in ["mean", "median", "average", "standard deviation", "variability", "spread"]):
-            return "statistics"
-        if "=" in q or re.search(r"\b[xyzabn]\b", q):
             return "algebra"
         return "general"
-    def _preview_question(self, question_text: str) -> str:
-        cleaned = " ".join((question_text or "").split())
-        if len(cleaned) <= 120:
-            return cleaned
-        return cleaned[:117].rstrip() + "..."
     def _extract_equation(self, question_text: str) -> Optional[str]:
-        text = self._clean(question_text)
-        m = re.search(r"([^\?]*=[^\?]*)", text)
-        if m:
-            eq = self._clean(m.group(1))
-            return eq or None
-        return None
     def _extract_ratio(self, question_text: str) -> Optional[str]:
-        text = self._clean(question_text)
-        m = re.search(r"\b(\d+\s*:\s*\d+)\b", text)
-        if m:
-            return self._clean(m.group(1))
-        return None
-    def _extract_percent_values(self, question_text: str) -> List[str]:
-        return re.findall(r"\d+\.?\d*\s*%", question_text or "")
-    def _looks_like_linear_equation(self, question_text: str) -> bool:
-        q = self._clean(question_text)
-        low = q.lower()
-        return bool(
-            "=" in q
-            and re.search(r"\bwhat is\s+[a-z]\b", low)
-            and re.search(r"\d+[a-z]\b|\b[a-z]\b", q)
-        )
     def _pack_looks_generic(self, pack: Dict[str, Any], topic: str) -> bool:
         if not pack:
             return True
-        joined = " ".join(
-            [
-                self._clean(pack.get("first_step")),
-                self._clean(pack.get("hint_1")),
-                self._clean(pack.get("hint_2")),
-                self._clean(pack.get("hint_3")),
-                " ".join(self._listify(pack.get("walkthrough_steps"))),
-                " ".join(self._listify(pack.get("method_explanation"))),
-            ]
-        ).lower()
-        generic_signals = [
-            "write the equation clearly and identify the variable",
-            "undo operations in reverse order",
-            "keep both sides balanced",
-            "break the question into known and unknown parts",
-            "what is being asked?",
-            "what information is given?",
-            "translate words into math",
-        ]
-        if any(signal in joined for signal in generic_signals):
-            return True
-        if topic == "algebra" and "look at the structure" in joined:
             return True
-        return False
-    def _topic_defaults(self, topic: str, question_text: str, options_text: Optional[List[str]]) -> Dict[str, Any]:
-        preview = self._preview_question(question_text)
-        equation = self._extract_equation(question_text)
         ratio_text = self._extract_ratio(question_text)
-        percent_values = self._extract_percent_values(question_text)
-        has_options = bool(options_text)
-        generic = {
-            "first_step": f"Focus on what the question is really asking in: {preview}",
-            "hint_1": "Identify the exact quantity you need to find.",
-            "hint_2": "Translate the key relationship in the question into a usable setup.",
-            "hint_3": "Check each step against the wording before choosing an option.",
             "hint_ladder": [
-                "Identify the exact quantity you need to find.",
-                "Translate the key relationship in the question into a usable setup.",
-                "Check each step against the wording before choosing an option.",
             ],
             "walkthrough_steps": [
-                "Underline what is given and what must be found.",
-                "Set up the relationship before you calculate.",
-                "Work step by step and keep labels or units consistent.",
             ],
             "method_steps": [
-                "Start from the structure of the problem rather than jumping into arithmetic.",
-                "Use the wording to decide which relationship matters most.",
             ],
             "answer_path": [
-                "Set up the correct structure first.",
-                "Only then simplify or evaluate the result.",
             ],
-            "common_trap": "Rushing into calculation before setting up the relationship.",
         }
-        if topic == "algebra":
-            if self._looks_like_linear_equation(question_text):
-                generic.update(
-                    {
-                        "first_step": "Look at the variable side and identify the outermost operation attached to the variable.",
-                        "hint_1": "Undo the outside addition or subtraction on both sides before touching the coefficient.",
-                        "hint_2": "Once only the variable term remains, undo the multiplication or division.",
-                        "hint_3": "After isolating the variable, compare carefully with what the question actually asks for.",
-                        "hint_ladder": [
-                            "Look at the variable side and identify the outermost operation attached to the variable.",
-                            "Undo the outside addition or subtraction on both sides before touching the coefficient.",
-                            "Once only the variable term remains, undo the multiplication or division.",
-                        ],
-                        "walkthrough_steps": [
-                            "Write the equation cleanly and focus on the side containing the variable.",
-                            "Undo the constant attached to that side using the opposite operation on both sides.",
-                            "Then undo the coefficient attached to the variable.",
-                            "Only then compare the isolated value with the answer choices or requested expression.",
-                        ],
-                        "method_steps": [
-                            "Linear equations are solved by reversing operations in the opposite order from how they were built.",
-                            "Keep the equation balanced by doing the same thing to both sides each time.",
-                        ],
-                        "answer_path": [
-                            "Undo the constant on the variable side first.",
-                            "Then undo the coefficient attached to the variable.",
-                            "Check whether the question asks for the variable itself or for an expression built from it.",
-                        ],
-                        "common_trap": "Dividing by the coefficient too early before removing the constant term.",
-                    }
-                )
-            elif equation:
-                generic.update(
-                    {
-                        "first_step": f"Start from the equation {equation} and decide which operation should be reversed first.",
-                        "hint_1": "Preserve balance by doing the same operation to both sides.",
-                        "hint_2": "Reverse the operations in a sensible order instead of trying to simplify everything at once.",
-                        "hint_3": "Only evaluate the target expression after the variables are in a usable form.",
-                    }
-                )
-        elif topic == "percent":
-            first_step = "Identify the base quantity before doing any percent calculation."
-            if percent_values:
-                first_step = f"Track the percentage relationship carefully here: {' then '.join(percent_values[:2]) if len(percent_values) > 1 else percent_values[0]}"
-            if "increased by" in question_text.lower() and "decreased by" in question_text.lower():
-                generic.update(
-                    {
-                        "first_step": "Turn each percentage change into its own multiplier before combining anything.",
-                        "hint_1": "An increase and a decrease of the same percent do not cancel because they apply to different bases.",
-                        "hint_2": "Apply the first multiplier, then apply the second multiplier to the updated amount.",
-                        "hint_3": "Compare the final amount with the original amount only at the end.",
-                        "hint_ladder": [
-                            "Turn each percentage change into its own multiplier before combining anything.",
-                            "Apply the first multiplier, then apply the second multiplier to the updated amount.",
-                            "Compare the final amount with the original amount only at the end.",
-                        ],
-                    }
-                )
-            else:
-                generic.update(
-                    {
-                        "first_step": first_step,
-                        "hint_1": "Ask 'percent of what?' so you choose the correct base quantity.",
-                        "hint_2": "Rewrite the percent as a decimal or fraction if that makes the relationship clearer.",
-                        "hint_3": "Set up part = percent × base, or reverse that relationship if the base is unknown.",
-                        "hint_ladder": [
-                            "Ask 'percent of what?' so you choose the correct base quantity.",
-                            "Rewrite the percent as a decimal or fraction if that makes the relationship clearer.",
-                            "Set up part = percent × base, or reverse that relationship if the base is unknown.",
-                        ],
-                    }
-                )
-        elif topic == "ratio":
-            first_step = "Keep the ratio order consistent and assign one shared multiplier."
-            if ratio_text:
-                first_step = f"Use the ratio {ratio_text} as parts of one whole."
-            generic.update(
-                {
-                    "first_step": first_step,
-                    "hint_1": "Write each part of the ratio using the same multiplier.",
-                    "hint_2": "Use the total or known part to solve for that shared multiplier.",
-                    "hint_3": "Substitute back into the exact quantity the question asks for.",
-                    "hint_ladder": [
-                        "Write each part of the ratio using the same multiplier.",
-                        "Use the total or known part to solve for that shared multiplier.",
-                        "Substitute back into the exact quantity the question asks for.",
-                    ],
-                    "walkthrough_steps": [
-                        first_step,
-                        "Represent each ratio part in terms of one common variable such as k.",
-                        "Use the given total or condition to find k.",
-                        "Build the requested expression from those ratio parts.",
-                    ],
-                    "method_steps": [
-                        "Ratio problems usually become easier once you turn the ratio into matching parts.",
-                        "Avoid treating ratio numbers as the actual values unless the problem tells you they are.",
-                    ],
-                    "common_trap": "Using the raw ratio numbers as real values before solving for the common multiplier.",
-                }
-            )
-        elif topic == "probability":
-            generic.update(
-                {
-                    "first_step": "Decide what counts as a successful outcome before you count anything.",
-                    "hint_1": "Count the favorable outcomes that satisfy the condition.",
-                    "hint_2": "Count the total possible outcomes in the sample space.",
-                    "hint_3": "Build the probability as favorable over total, then simplify if needed.",
-                    "hint_ladder": [
-                        "Decide what counts as a successful outcome before you count anything.",
-                        "Count the favorable outcomes that satisfy the condition.",
-                        "Count the total possible outcomes in the sample space.",
-                    ],
-                    "walkthrough_steps": [
-                        "Define the event the question cares about.",
-                        "Count or construct the favorable cases.",
-                        "Count the total cases in the sample space.",
-                        "Write the probability as favorable over total.",
-                    ],
-                    "method_steps": [
-                        "Probability questions become clearer once the event and the sample space are both explicit.",
-                        "Many errors come from counting the wrong denominator, not the numerator.",
-                    ],
-                    "common_trap": "Changing the denominator incorrectly or forgetting which cases are actually favorable.",
-                }
-            )
-            if "at least" in question_text.lower():
-                generic["hint_2"] = "Check whether the complement is easier to count than the requested event."
-                generic["hint_ladder"] = [
-                    generic["hint_1"],
-                    "Check whether the complement is easier to count than the requested event.",
-                    generic["hint_3"],
-                ]
-        elif topic == "statistics":
-            qlow = question_text.lower()
-            if any(k in qlow for k in ["variability", "spread", "standard deviation"]):
-                generic.update(
-                    {
-                        "first_step": "Notice that this is about spread, not average.",
-                        "hint_1": "Use the middle value as a centre and compare how far the outer values sit from it.",
-                        "hint_2": "A set with values clustered tightly has lower variability than a set spread farther apart.",
-                        "hint_3": "Choose the set with the widest spread, not the largest mean.",
-                        "hint_ladder": [
-                            "Notice that this is about spread, not average.",
-                            "Use the middle value as a centre and compare how far the outer values sit from it.",
-                            "Choose the set with the widest spread, not the largest mean.",
-                        ],
-                    }
-                )
-            else:
-                generic.update(
-                    {
-                        "first_step": "Identify which statistical measure the question wants before calculating anything.",
-                        "hint_1": "Check whether the task is asking for mean, median, range, or another measure.",
-                        "hint_2": "Organise the data in a clean order if that helps reveal the measure.",
-                        "hint_3": "Use the exact definition of the requested measure rather than a nearby one.",
-                    }
-                )
-        if has_options:
-            generic["answer_path"] = list(generic.get("answer_path", [])) + [
-                "Use the answer choices to check which setup fits the question instead of guessing."
             ]
-        return generic
     def _merge_support_pack(self, generated: Dict[str, Any], stored: Optional[Dict[str, Any]], topic: str) -> Dict[str, Any]:
         if not stored:
-            merged = dict(generated)
-            merged["support_source"] = "generated_question_specific"
-            return merged
-        merged = dict(generated)
-        merged.update(dict(stored))
-        if self._pack_looks_generic(stored, topic):
-            for key in [
-                "first_step",
-                "hint_1",
-                "hint_2",
-                "hint_3",
-                "hint_ladder",
-                "walkthrough_steps",
-                "method_steps",
-                "answer_path",
-                "common_trap",
-            ]:
-                if key in generated:
-                    merged[key] = generated[key]
             merged["support_source"] = "question_bank_refined"
         else:
-            merged.setdefault("support_source", "question_bank")
-        merged.setdefault("method_steps", generated.get("method_steps", []))
-        merged.setdefault("answer_path", generated.get("answer_path", []))
         return merged
-    def get_support_pack(
-        self,
-        *,
-        question_id: Optional[str],
-        question_text: str,
-        options_text: Optional[List[str]],
-        topic: Optional[str],
-        category: Optional[str],
-    ) -> Dict[str, Any]:
         resolved_topic = self._normalize_topic(topic, question_text)
         generated = self._topic_defaults(resolved_topic, question_text, options_text)
-        stored = question_support_bank.get(
-            question_id=question_id,
-            question_text=question_text,
-            options_text=options_text,
-        )
         pack = self._merge_support_pack(generated, stored, resolved_topic)
         pack.setdefault("question_id", question_id)
         pack.setdefault("question_text", question_text)
@@ -377,24 +435,21 @@ class QuestionFallbackRouter:
         return pack
     def _hint_ladder_from_pack(self, pack: Dict[str, Any]) -> List[str]:
-        hints: List[str] = []
-        first_step = self._clean(pack.get("first_step"))
-        if first_step:
-            hints.append(first_step)
         for key in ("hint_1", "hint_2", "hint_3"):
             value = self._clean(pack.get(key))
             if value:
-                hints.append(value)
-        hints.extend(self._listify(pack.get("hint_ladder")))
-        hints.extend(self._listify(pack.get("hints")))
-        return self._dedupe(hints)
     def _walkthrough_from_pack(self, pack: Dict[str, Any]) -> List[str]:
-        lines: List[str] = []
-        first_step = self._clean(pack.get("first_step"))
-        if first_step:
-            lines.append(first_step)
-        lines.extend(self._listify(pack.get("walkthrough_steps")))
         return self._dedupe(lines)
     def _method_from_pack(self, pack: Dict[str, Any]) -> List[str]:
@@ -405,89 +460,57 @@ class QuestionFallbackRouter:
         lines.extend(self._listify(pack.get("method_steps")))
         lines.extend(self._listify(pack.get("method_explanation")))
         if not lines:
-            lines.extend(self._walkthrough_from_pack(pack)[:3])
         return self._dedupe(lines)
     def _answer_path_from_pack(self, pack: Dict[str, Any]) -> List[str]:
-        lines: List[str] = []
-        first_step = self._clean(pack.get("first_step"))
-        if first_step:
-            lines.append(first_step)
-        lines.extend(self._listify(pack.get("answer_path")))
         return self._dedupe(lines)
-    def _verbosity_limit(self, verbosity: float, low: int, mid: int, high: int) -> int:
-        if verbosity < 0.25:
             return low
-        if verbosity < 0.65:
             return mid
         return high
-    def build_response(
-        self,
-        *,
-        question_id: Optional[str],
-        question_text: str,
-        options_text: Optional[List[str]],
-        topic: Optional[str],
-        category: Optional[str],
-        help_mode: str,
-        hint_stage: int,
-        verbosity: float,
-    ) -> Dict[str, Any]:
-        pack = self.get_support_pack(
-            question_id=question_id,
-            question_text=question_text,
-            options_text=options_text,
-            topic=topic,
-            category=category,
-        )
         mode = (help_mode or "answer").lower()
-        stage = max(1, min(int(hint_stage or 1), 4))
-        first_step = self._clean(pack.get("first_step"))
         hint_ladder = self._hint_ladder_from_pack(pack)
         walkthrough_steps = self._walkthrough_from_pack(pack)
         method_steps = self._method_from_pack(pack)
         answer_path = self._answer_path_from_pack(pack)
         common_trap = self._clean(pack.get("common_trap"))
-        lines: List[str] = []
         if mode == "hint":
             if hint_ladder:
-                idx = min(stage - 1, len(hint_ladder) - 1)
-                selected = [hint_ladder[idx]]
-                if verbosity >= 0.7 and idx + 1 < len(hint_ladder):
-                    selected.append(hint_ladder[idx + 1])
             else:
-                selected = [first_step or "Start by identifying the structure of the question."]
-            if verbosity >= 0.75 and stage >= 3 and common_trap:
-                selected.append(f"Watch out for this trap: {common_trap}")
-            lines = self._dedupe(selected)
-        elif mode in {"walkthrough", "step_by_step", "instruction"}:
             source = walkthrough_steps or answer_path or hint_ladder
-            limit = self._verbosity_limit(verbosity, low=2, mid=4, high=6)
-            lines = source[:limit] if source else [first_step or "Start by setting up the problem."]
-            if verbosity >= 0.7 and common_trap:
                 lines = list(lines) + [f"Watch out for this trap: {common_trap}"]
         elif mode in {"method", "explain", "concept", "definition"}:
-            source = method_steps or walkthrough_steps or answer_path or hint_ladder
-            limit = self._verbosity_limit(verbosity, low=1, mid=2, high=4)
-            lines = source[:limit] if source else [first_step or "Start from the problem structure."]
-            if verbosity >= 0.65 and common_trap:
                 lines = list(lines) + [f"Common trap: {common_trap}"]
         else:
             source = answer_path or walkthrough_steps or hint_ladder
-            limit = self._verbosity_limit(verbosity, low=2, mid=3, high=5)
-            lines = source[:limit] if source else [first_step or "Start by identifying the relationship in the question."]
-        lines = self._dedupe(lines)
-        return {"lines": lines, "pack": pack}
-question_fallback_router = QuestionFallbackRouter()

+from __future__ import annotations
 import re
 from typing import Any, Dict, List, Optional
 from question_support_loader import question_support_bank
+GENERIC_MARKERS = {
+    "write the equation clearly and identify the variable.",
+    "undo operations in reverse order.",
+    "keep both sides balanced while isolating the variable.",
+    "understand the problem.",
+    "identify variables.",
+    "set up relationships.",
+    "solve step by step.",
+    "what is being asked?",
+    "what information is given?",
+    "how can you link them mathematically?",
+}
 class QuestionFallbackRouter:
     def _clean(self, text: Optional[str]) -> str:
         return (text or "").strip()
         if isinstance(value, tuple):
             return [str(v).strip() for v in value if str(v).strip()]
         if isinstance(value, str):
+            return [value.strip()] if value.strip() else []
         return []
     def _dedupe(self, items: List[str]) -> List[str]:
     def _normalize_topic(self, topic: Optional[str], question_text: str) -> str:
         q = (question_text or "").lower()
         t = (topic or "").strip().lower()
         if t and t not in {"general", "unknown", "general_quant", "quant"}:
             return t
         if "%" in q or "percent" in q:
             return "percent"
+        if "probability" in q or "chance" in q or "at random" in q or "odds" in q:
             return "probability"
+        if "ratio" in q or re.search(r"\d+\s*:\s*\d+", q):
+            return "ratio"
+        if any(k in q for k in ["mean", "median", "standard deviation", "variability", "spread"]):
+            return "statistics"
+        if any(k in q for k in ["remainder", "prime", "divisible", "factor", "multiple"]):
             return "number_theory"
+        if any(k in q for k in ["rectangle", "triangle", "circle", "perimeter", "area"]):
             return "geometry"
+        if "=" in q or re.search(r"[xyzabn]", q):
             return "algebra"
         return "general"
     def _extract_equation(self, question_text: str) -> Optional[str]:
+        m = re.search(r"([^?]*=[^?]*)", self._clean(question_text))
+        return self._clean(m.group(1)) if m else None
     def _extract_ratio(self, question_text: str) -> Optional[str]:
+        m = re.search(r"(\d+\s*:\s*\d+)", question_text or "")
+        return self._clean(m.group(1)) if m else None
+    def _looks_like_simple_linear(self, question_text: str) -> bool:
+        q = (question_text or "").lower()
+        return bool("=" in q and re.search(r"what is\s+[a-z]", q))
     def _pack_looks_generic(self, pack: Dict[str, Any], topic: str) -> bool:
         if not pack:
             return True
+        lines: List[str] = []
+        for key in ["first_step", "hint_1", "hint_2", "hint_3", "common_trap", "concept"]:
+            value = self._clean(pack.get(key))
+            if value:
+                lines.append(value.lower())
+        for key in ["hint_ladder", "walkthrough_steps", "method_steps", "method_explanation", "answer_path"]:
+            lines.extend(x.lower() for x in self._listify(pack.get(key)))
+        if not lines:
             return True
+        meaningful = 0
+        for line in lines:
+            if line not in GENERIC_MARKERS and len(line.split()) >= 5:
+                meaningful += 1
+        if pack.get("support_match", {}).get("mode") in {"question_id", "signature_exact", "text_exact"} and meaningful >= 2:
+            return False
+        return meaningful < 3
+    def _algebra_pack(self, question_text: str) -> Dict[str, Any]:
+        eq = self._extract_equation(question_text) or "the equation"
+        if self._looks_like_simple_linear(question_text):
+            return {
+                "first_step": f"Start with {eq} and undo the outer operation around the variable first.",
+                "hint_1": "Move the constant term on the variable side by doing the opposite operation to both sides.",
+                "hint_2": "Once the variable term is isolated, undo the coefficient on the variable.",
+                "hint_3": "Check your value by substituting it back into the original equation.",
+                "hint_ladder": [
+                    f"Look at {eq} and ask which operation is happening to the variable last.",
+                    "Undo the constant attached to the variable side by using the opposite operation on both sides.",
+                    "After that, undo the multiplication or division on the variable itself.",
+                    "Substitute your candidate value back in to verify it reproduces the original right-hand side.",
+                ],
+                "walkthrough_steps": [
+                    f"Rewrite the equation cleanly: {eq}.",
+                    "Undo the addition or subtraction around the variable by applying the opposite operation to both sides.",
+                    "Then undo the multiplication or division on the variable.",
+                    "Check the result in the original equation, not just the simplified one.",
+                ],
+                "method_steps": [
+                    "Linear equations are usually solved by reversing operations in the opposite order from how they affect the variable.",
+                    "Keeping both sides balanced is what lets every step stay equivalent to the original equation.",
+                ],
+                "answer_path": [
+                    "Reverse the outer operation first.",
+                    "Then remove the coefficient from the variable.",
+                    "Verify by substitution.",
+                ],
+                "common_trap": "Undoing the coefficient before removing the constant on the variable side.",
+            }
+        return {
+            "first_step": f"Rewrite {eq} in a clean algebraic form before manipulating it.",
+            "hint_1": "Decide which quantity is unknown and which relationships are given.",
+            "hint_2": "Set up one equation at a time from those relationships.",
+            "hint_3": "Only simplify after the structure is correct.",
+            "walkthrough_steps": [
+                "Name the unknown quantity clearly.",
+                "Translate each condition into an equation or constraint.",
+                "Simplify the algebra only after the setup is correct.",
+            ],
+            "method_steps": [
+                "Algebra questions are easiest when you translate the wording into relationships before calculating.",
+            ],
+            "answer_path": [
+                "Identify the unknown.",
+                "Build the equation.",
+                "Isolate the target quantity.",
+            ],
+            "common_trap": "Starting calculations before defining the unknown or building the equation correctly.",
+        }
+    def _percent_pack(self, question_text: str) -> Dict[str, Any]:
+        q = question_text.lower()
+        if "increase" in q or "decrease" in q:
+            return {
+                "first_step": "Turn each percent change into its own multiplier before combining anything.",
+                "hint_1": "Use the original amount as a clean base, often 100, unless the question already gives a convenient number.",
+                "hint_2": "Apply the first percentage change to the current amount, not the final amount.",
+                "hint_3": "Apply the second change to the updated amount, then compare with the original only at the end.",
+                "hint_ladder": [
+                    "Treat a percent increase or decrease as multiplication, not simple adding or subtracting percentages.",
+                    "Apply the first multiplier to the starting amount.",
+                    "Apply the second multiplier to that new amount.",
+                    "Compare the final result with the original base only after both changes are done.",
+                ],
+                "walkthrough_steps": [
+                    "Choose an easy original value such as 100 if no starting number is given.",
+                    "Convert each percentage change into a multiplier.",
+                    "Apply the multipliers in sequence.",
+                    "Express the final amount relative to the original amount.",
+                ],
+                "method_steps": [
+                    "Successive percent changes are multiplicative because each new percent acts on the current amount.",
+                    "That is why equal increases and decreases do not cancel each other out.",
+                ],
+                "answer_path": [
+                    "Pick a clean base value.",
+                    "Apply each change in order.",
+                    "Compare final with original.",
+                ],
+                "common_trap": "Adding and subtracting the percentages directly instead of applying them sequentially.",
+            }
+        return {
+            "first_step": "Ask 'percent of what?' before writing any equation.",
+            "hint_1": "Separate the part, the percent, and the base quantity.",
+            "hint_2": "Write the relationship as part = percent × base, or reverse it if the base is unknown.",
+            "hint_3": "Only convert to a final percent form after the relationship is set up correctly.",
+            "walkthrough_steps": [
+                "Identify the base amount the percent is taken from.",
+                "Write the percent as a decimal or fraction.",
+                "Set up the percent relationship.",
+                "Solve for the requested quantity.",
+            ],
+            "method_steps": [
+                "Most percent errors come from choosing the wrong base quantity, not from arithmetic.",
+            ],
+            "answer_path": [
+                "Identify the base quantity.",
+                "Set up the percent relationship.",
+                "Solve for the target.",
+            ],
+            "common_trap": "Using the part as the base or applying the percent to the wrong quantity.",
+        }
+    def _ratio_pack(self, question_text: str) -> Dict[str, Any]:
         ratio_text = self._extract_ratio(question_text)
+        first = f"Treat {ratio_text} as matching parts of one whole." if ratio_text else "Treat the ratio numbers as parts, not final values."
+        return {
+            "first_step": first,
+            "hint_1": "Represent each ratio part using one shared multiplier such as k.",
+            "hint_2": "Use the given total or condition to find that shared multiplier.",
+            "hint_3": "Substitute back into the exact quantity the question asks for.",
             "hint_ladder": [
+                first,
+                "Write each quantity as a ratio part times the same multiplier.",
+                "Use the total or condition to solve for the multiplier.",
+                "Build the requested expression from the actual quantities, not the raw ratio numbers.",
             ],
             "walkthrough_steps": [
+                first,
+                "Assign variables to each ratio part using one multiplier.",
+                "Solve for the multiplier from the given condition.",
+                "Evaluate the requested quantity.",
             ],
             "method_steps": [
+                "Ratio questions simplify when you convert the ratio into actual quantities with one shared multiplier.",
             ],
             "answer_path": [
+                "Write each part with a common multiplier.",
+                "Solve for the multiplier.",
+                "Substitute into the target expression.",
             ],
+            "common_trap": "Using the raw ratio numbers as actual values instead of scaled parts.",
         }
+    def _probability_pack(self, question_text: str) -> Dict[str, Any]:
+        q = question_text.lower()
+        pack = {
+            "first_step": "Define exactly what counts as a successful outcome before you count anything.",
+            "hint_1": "Count the favorable outcomes that satisfy the condition.",
+            "hint_2": "Count the total possible outcomes in the sample space.",
+            "hint_3": "Write probability as favorable over total, then simplify only at the end.",
+            "hint_ladder": [
+                "State the event in plain language: what outcome are you trying to get?",
+                "Count the favorable cases for that event.",
+                "Count the total possible cases in the sample space.",
+                "Build the probability as favorable over total.",
+            ],
+            "walkthrough_steps": [
+                "Define the event the question cares about.",
+                "Count the favorable cases.",
+                "Count the total possible cases.",
+                "Write the probability as favorable divided by total.",
+            ],
+            "method_steps": [
+                "Probability becomes much easier once the event and sample space are both explicit.",
+                "Many mistakes come from counting the wrong denominator, not the numerator.",
+            ],
+            "answer_path": [
+                "Define the event.",
+                "Count favorable outcomes.",
+                "Count total outcomes.",
+            ],
+            "common_trap": "Using the wrong denominator or forgetting outcomes that belong in the sample space.",
+        }
+        if "at least" in q or "at most" in q:
+            pack["hint_ladder"] = [
+                "Check whether the complement is easier to count than the event you want.",
+                "Count the easier side first if that reduces the work.",
+                "Convert back to the requested event at the end.",
+                "Then write the probability with the correct denominator.",
             ]
+        return pack
+    def _statistics_pack(self, question_text: str) -> Dict[str, Any]:
+        q = question_text.lower()
+        if any(k in q for k in ["variability", "spread", "standard deviation"]):
+            return {
+                "first_step": "Notice that the question is about spread, not average.",
+                "hint_1": "Compare how far the values sit from the centre of each set.",
+                "hint_2": "A set with values clustered tightly has lower variability than a set spread farther apart.",
+                "hint_3": "Pick the set with the widest spread, not the highest mean.",
+                "hint_ladder": [
+                    "Ignore the mean at first and focus on how spread out the values are.",
+                    "Compare the distance of the outer values from the middle of each set.",
+                    "The set with the wider spread has greater variability.",
+                ],
+                "walkthrough_steps": [
+                    "Identify the centre of each set mentally or numerically.",
+                    "Compare how tightly the values cluster around that centre.",
+                    "Choose the set with the larger spread.",
+                ],
+                "method_steps": [
+                    "Variability measures spread, so a dataset can have the same mean as another and still be more variable.",
+                ],
+                "answer_path": [
+                    "Look at spread around the centre.",
+                    "Compare clustering versus spread.",
+                ],
+                "common_trap": "Choosing the set with the highest mean instead of the greatest spread.",
+            }
+        return {
+            "first_step": "Identify which statistical measure the question cares about before calculating.",
+            "hint_1": "Decide whether the task is about mean, median, range, spread, or another measure.",
+            "hint_2": "Organise the data cleanly if that makes the measure easier to see.",
+            "hint_3": "Use the exact definition of the requested measure.",
+            "walkthrough_steps": [
+                "Identify the requested statistic.",
+                "Organise the data.",
+                "Apply the definition of that statistic.",
+            ],
+            "method_steps": [
+                "Statistics questions are easiest once you know which measure matters.",
+            ],
+            "answer_path": [
+                "Identify the target statistic.",
+                "Apply its definition.",
+            ],
+            "common_trap": "Using a nearby but different statistical measure.",
+        }
+    def _geometry_pack(self, question_text: str) -> Dict[str, Any]:
+        q = question_text.lower()
+        if "perimeter" in q and "rectangle" in q:
+            return {
+                "first_step": "Start with the perimeter formula for a rectangle: 2L + 2W.",
+                "hint_1": "Substitute the known perimeter and known side length into the formula.",
+                "hint_2": "Isolate the remaining side length after substitution.",
+                "hint_3": "Check the width in the perimeter formula once more.",
+                "walkthrough_steps": [
+                    "Write the perimeter formula.",
+                    "Plug in the given perimeter and length.",
+                    "Solve for the width.",
+                ],
+                "method_steps": [
+                    "Geometry questions are often formula-matching questions first and algebra questions second.",
+                ],
+                "answer_path": [
+                    "Write the formula.",
+                    "Substitute given values.",
+                    "Solve for the missing side.",
+                ],
+                "common_trap": "Forgetting that perimeter includes both lengths and both widths.",
+            }
+        return {
+            "first_step": "Identify the shape and the formula that matches it.",
+            "hint_1": "Write the relevant geometry formula before substituting numbers.",
+            "hint_2": "Substitute carefully and keep track of what the question actually asks for.",
+            "hint_3": "Use algebra only after the correct formula is in place.",
+            "walkthrough_steps": [
+                "Identify the shape.",
+                "Choose the correct formula.",
+                "Substitute values and solve.",
+            ],
+            "method_steps": [
+                "Most geometry errors come from choosing the wrong formula or solving for the wrong quantity.",
+            ],
+            "answer_path": [
+                "Match the shape to its formula.",
+                "Substitute the known values.",
+                "Solve for the target quantity.",
+            ],
+            "common_trap": "Using the wrong formula or solving for the wrong dimension.",
+        }
+    def _topic_defaults(self, topic: str, question_text: str, options_text: Optional[List[str]]) -> Dict[str, Any]:
+        topic = self._normalize_topic(topic, question_text)
+        if topic == "algebra":
+            return self._algebra_pack(question_text)
+        if topic == "percent":
+            return self._percent_pack(question_text)
+        if topic == "ratio":
+            return self._ratio_pack(question_text)
+        if topic == "probability":
+            return self._probability_pack(question_text)
+        if topic == "statistics":
+            return self._statistics_pack(question_text)
+        if topic == "geometry":
+            return self._geometry_pack(question_text)
+        return {
+            "first_step": "Identify the exact relationship the question is testing before doing any arithmetic.",
+            "hint_1": "Separate what is given from what you need to find.",
+            "hint_2": "Build the relationship or formula that links those pieces.",
+            "hint_3": "Only calculate after the structure is correct.",
+            "walkthrough_steps": [
+                "State what is given and what is unknown.",
+                "Build the relationship between them.",
+                "Solve for the requested quantity.",
+            ],
+            "method_steps": [
+                "General quant questions become clearer when you translate the wording into a structure first.",
+            ],
+            "answer_path": [
+                "Identify the structure.",
+                "Set up the relationship.",
+                "Solve the target quantity.",
+            ],
+            "common_trap": "Starting arithmetic before the structure of the problem is clear.",
+        }
     def _merge_support_pack(self, generated: Dict[str, Any], stored: Optional[Dict[str, Any]], topic: str) -> Dict[str, Any]:
         if not stored:
+            out = dict(generated)
+            out["support_source"] = "generated_question_specific"
+            return out
+        merged = dict(stored)
+        looks_generic = self._pack_looks_generic(stored, topic)
+        if looks_generic:
+            for key, value in generated.items():
+                if value:
+                    merged[key] = value
             merged["support_source"] = "question_bank_refined"
         else:
+            for key, value in generated.items():
+                if key not in merged or not merged.get(key):
+                    merged[key] = value
+            merged["support_source"] = "question_bank"
         return merged
+    def get_support_pack(self, *, question_id: Optional[str], question_text: str, options_text: Optional[List[str]], topic: Optional[str], category: Optional[str]) -> Dict[str, Any]:
         resolved_topic = self._normalize_topic(topic, question_text)
         generated = self._topic_defaults(resolved_topic, question_text, options_text)
+        stored = question_support_bank.get(question_id=question_id, question_text=question_text, options_text=options_text)
         pack = self._merge_support_pack(generated, stored, resolved_topic)
         pack.setdefault("question_id", question_id)
         pack.setdefault("question_text", question_text)
         return pack
     def _hint_ladder_from_pack(self, pack: Dict[str, Any]) -> List[str]:
+        lines: List[str] = []
+        if self._clean(pack.get("first_step")):
+            lines.append(self._clean(pack.get("first_step")))
         for key in ("hint_1", "hint_2", "hint_3"):
             value = self._clean(pack.get(key))
             if value:
+                lines.append(value)
+        lines.extend(self._listify(pack.get("hint_ladder")))
+        lines.extend(self._listify(pack.get("hints")))
+        return self._dedupe(lines)
     def _walkthrough_from_pack(self, pack: Dict[str, Any]) -> List[str]:
+        lines = self._listify(pack.get("walkthrough_steps"))
+        if not lines and self._clean(pack.get("first_step")):
+            lines.append(self._clean(pack.get("first_step")))
         return self._dedupe(lines)
     def _method_from_pack(self, pack: Dict[str, Any]) -> List[str]:
         lines.extend(self._listify(pack.get("method_steps")))
         lines.extend(self._listify(pack.get("method_explanation")))
         if not lines:
+            lines.extend(self._walkthrough_from_pack(pack)[:2])
         return self._dedupe(lines)
     def _answer_path_from_pack(self, pack: Dict[str, Any]) -> List[str]:
+        lines = self._listify(pack.get("answer_path"))
+        if not lines:
+            lines = self._walkthrough_from_pack(pack)
         return self._dedupe(lines)
+    def _verbosity_limit(self, verbosity: float, *, low: int, mid: int, high: int) -> int:
+        if verbosity < 0.28:
             return low
+        if verbosity < 0.68:
             return mid
         return high
+    def build_response(self, *, question_id: Optional[str], question_text: str, options_text: Optional[List[str]], topic: Optional[str], category: Optional[str], help_mode: str, hint_stage: int, verbosity: float) -> Dict[str, Any]:
+        pack = self.get_support_pack(question_id=question_id, question_text=question_text, options_text=options_text, topic=topic, category=category)
         mode = (help_mode or "answer").lower()
+        stage = max(1, int(hint_stage or 1))
         hint_ladder = self._hint_ladder_from_pack(pack)
         walkthrough_steps = self._walkthrough_from_pack(pack)
         method_steps = self._method_from_pack(pack)
         answer_path = self._answer_path_from_pack(pack)
         common_trap = self._clean(pack.get("common_trap"))
         if mode == "hint":
+            idx = min(stage - 1, max(len(hint_ladder) - 1, 0))
             if hint_ladder:
+                lines = [hint_ladder[idx]]
+                if verbosity >= 0.62 and idx + 1 < len(hint_ladder):
+                    lines.append(hint_ladder[idx + 1])
+                if verbosity >= 0.82 and stage >= 3 and common_trap:
+                    lines.append(f"Watch out for this trap: {common_trap}")
             else:
+                lines = [self._clean(pack.get("first_step")) or "Start with the structure of the problem."]
+        elif mode in {"walkthrough", "instruction", "step_by_step"}:
             source = walkthrough_steps or answer_path or hint_ladder
+            lines = source[: self._verbosity_limit(verbosity, low=2, mid=4, high=6)]
+            if verbosity >= 0.8 and common_trap:
                 lines = list(lines) + [f"Watch out for this trap: {common_trap}"]
         elif mode in {"method", "explain", "concept", "definition"}:
+            source = method_steps or walkthrough_steps or answer_path
+            lines = source[: self._verbosity_limit(verbosity, low=1, mid=2, high=4)]
+            if verbosity >= 0.72 and common_trap:
                 lines = list(lines) + [f"Common trap: {common_trap}"]
         else:
             source = answer_path or walkthrough_steps or hint_ladder
+            lines = source[: self._verbosity_limit(verbosity, low=2, mid=3, high=5)]
+        return {"lines": self._dedupe(lines), "pack": pack}
+question_fallback_router = QuestionFallbackRouter()

question_support_loader.py CHANGED Viewed

@@ -1,45 +1,66 @@
 import json
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
 class QuestionSupportBank:
     def __init__(self, data_path: Optional[str] = None) -> None:
         base_dir = Path(__file__).resolve().parent
         self.data_path = Path(data_path) if data_path else base_dir / "data" / "question_support_bank.jsonl"
         self._loaded = False
         self._by_id: Dict[str, Dict[str, Any]] = {}
         self._by_text: Dict[str, Dict[str, Any]] = {}
         self._by_signature: Dict[str, Dict[str, Any]] = {}
-        self._items: List[Dict[str, Any]] = []
     def _normalize(self, text: Optional[str]) -> str:
         cleaned = (text or "").strip().lower()
-        cleaned = cleaned.replace("’", "'")
         cleaned = re.sub(r"\s+", " ", cleaned)
-        return cleaned
     def _tokenize(self, text: Optional[str]) -> List[str]:
-        return re.findall(r"[a-z0-9%/]+", self._normalize(text))
     def _normalize_choice(self, value: Any) -> str:
         return self._normalize(str(value) if value is not None else "")
-    def _choice_signature(self, choices: Optional[List[Any]]) -> str:
-        cleaned = [self._normalize_choice(choice) for choice in (choices or []) if self._normalize_choice(choice)]
         return " || ".join(cleaned)
-    def _question_signature(self, question_text: Optional[str], choices: Optional[List[Any]] = None) -> str:
         q = self._normalize(question_text)
-        c = self._choice_signature(choices)
         return f"{q} ## {c}" if c else q
     def load(self) -> None:
         self._by_id = {}
         self._by_text = {}
         self._by_signature = {}
-        self._items = []
         if self.data_path.exists():
             with self.data_path.open("r", encoding="utf-8") as handle:
@@ -62,102 +83,113 @@ class QuestionSupportBank:
     def _store_item(self, item: Dict[str, Any]) -> None:
         if not isinstance(item, dict):
             return
         stored = dict(item)
-        qid = str(stored.get("question_id") or "").strip()
         stem = stored.get("question_text") or stored.get("stem") or ""
         choices = stored.get("options_text") or stored.get("choices") or []
-        qtext = self._normalize(stem)
-        signature = self._question_signature(stem, choices)
         if qid:
             self._by_id[qid] = stored
-        if qtext:
-            self._by_text[qtext] = stored
         if signature:
             self._by_signature[signature] = stored
         self._items.append(stored)
-    def _score_candidate(
-        self,
-        *,
-        query_text: str,
-        query_choices: Optional[List[Any]],
-        candidate: Dict[str, Any],
-    ) -> Tuple[float, float, float]:
         cand_text = candidate.get("question_text") or candidate.get("stem") or ""
         cand_choices = candidate.get("options_text") or candidate.get("choices") or []
         q_tokens = set(self._tokenize(query_text))
         c_tokens = set(self._tokenize(cand_text))
-        if not q_tokens or not c_tokens:
-            token_overlap = 0.0
-        else:
-            token_overlap = len(q_tokens & c_tokens) / max(len(q_tokens | c_tokens), 1)
-        q_choice_sig = self._choice_signature(query_choices)
-        c_choice_sig = self._choice_signature(cand_choices)
-        if q_choice_sig and c_choice_sig:
-            choice_match = 1.0 if q_choice_sig == c_choice_sig else 0.0
-        else:
-            choice_match = 0.0
-        exact_text = 1.0 if self._normalize(query_text) == self._normalize(cand_text) else 0.0
-        score = (0.55 * token_overlap) + (0.35 * choice_match) + (0.10 * exact_text)
-        return score, token_overlap, choice_match
-    def get(
-        self,
-        question_id: Optional[str] = None,
-        question_text: Optional[str] = None,
-        options_text: Optional[List[Any]] = None,
-    ) -> Optional[Dict[str, Any]]:
         self._ensure_loaded()
         qid = str(question_id or "").strip()
         if qid and qid in self._by_id:
-            return dict(self._by_id[qid])
         qtext = self._normalize(question_text)
         if qtext and qtext in self._by_text:
-            return dict(self._by_text[qtext])
-        signature = self._question_signature(question_text, options_text)
-        if signature and signature in self._by_signature:
-            return dict(self._by_signature[signature])
         if not qtext:
             return None
-        best: Optional[Dict[str, Any]] = None
         best_score = 0.0
-        best_overlap = 0.0
-        best_choice = 0.0
         for item in self._items:
-            score, token_overlap, choice_match = self._score_candidate(
-                query_text=question_text or "",
-                query_choices=options_text,
-                candidate=item,
-            )
             if score > best_score:
-                best = item
                 best_score = score
-                best_overlap = token_overlap
-                best_choice = choice_match
-        threshold = 0.84 if options_text else 0.92
-        if best is not None and (best_score >= threshold or (best_choice >= 1.0 and best_overlap >= 0.55)):
-            out = dict(best)
-            out.setdefault("support_match", {})
-            out["support_match"] = {
-                "mode": "fuzzy",
-                "score": round(best_score, 4),
-                "token_overlap": round(best_overlap, 4),
-                "choice_match": round(best_choice, 4),
-            }
-            return out
         return None
     def upsert(self, item: Dict[str, Any]) -> None:
@@ -169,4 +201,4 @@ class QuestionSupportBank:
         return [dict(v) for v in self._items]
-question_support_bank = QuestionSupportBank()

+from __future__ import annotations
 import json
 import re
+from difflib import SequenceMatcher
 from pathlib import Path
+from typing import Any, Dict, List, Optional
 class QuestionSupportBank:
+    """Load and retrieve authored question support entries with strong matching."""
     def __init__(self, data_path: Optional[str] = None) -> None:
         base_dir = Path(__file__).resolve().parent
         self.data_path = Path(data_path) if data_path else base_dir / "data" / "question_support_bank.jsonl"
         self._loaded = False
+        self._items: List[Dict[str, Any]] = []
         self._by_id: Dict[str, Dict[str, Any]] = {}
         self._by_text: Dict[str, Dict[str, Any]] = {}
         self._by_signature: Dict[str, Dict[str, Any]] = {}
+        self._by_unordered_signature: Dict[str, Dict[str, Any]] = {}
     def _normalize(self, text: Optional[str]) -> str:
         cleaned = (text or "").strip().lower()
+        cleaned = cleaned.replace("’", "'").replace("“", '"').replace("”", '"')
+        cleaned = cleaned.replace("−", "-").replace("–", "-")
         cleaned = re.sub(r"\s+", " ", cleaned)
+        cleaned = re.sub(r"\s*([=+\-*/:,;()])\s*", r"", cleaned)
+        return cleaned.strip()
     def _tokenize(self, text: Optional[str]) -> List[str]:
+        return re.findall(r"[a-z0-9%/.]+", self._normalize(text))
     def _normalize_choice(self, value: Any) -> str:
         return self._normalize(str(value) if value is not None else "")
+    def _coerce_choices(self, choices: Optional[List[Any]]) -> List[str]:
+        if not choices:
+            return []
+        out: List[str] = []
+        for choice in choices:
+            normalized = self._normalize_choice(choice)
+            if normalized:
+                out.append(normalized)
+        return out
+    def _choice_signature(self, choices: Optional[List[Any]], *, ordered: bool = True) -> str:
+        cleaned = self._coerce_choices(choices)
+        if not ordered:
+            cleaned = sorted(cleaned)
         return " || ".join(cleaned)
+    def _question_signature(self, question_text: Optional[str], choices: Optional[List[Any]] = None, *, ordered: bool = True) -> str:
         q = self._normalize(question_text)
+        c = self._choice_signature(choices, ordered=ordered)
         return f"{q} ## {c}" if c else q
     def load(self) -> None:
+        self._items = []
         self._by_id = {}
         self._by_text = {}
         self._by_signature = {}
+        self._by_unordered_signature = {}
         if self.data_path.exists():
             with self.data_path.open("r", encoding="utf-8") as handle:
     def _store_item(self, item: Dict[str, Any]) -> None:
         if not isinstance(item, dict):
             return
         stored = dict(item)
         stem = stored.get("question_text") or stored.get("stem") or ""
         choices = stored.get("options_text") or stored.get("choices") or []
+        qid = str(stored.get("question_id") or "").strip()
+        normalized_text = self._normalize(stem)
+        signature = self._question_signature(stem, choices, ordered=True)
+        unordered_signature = self._question_signature(stem, choices, ordered=False)
         if qid:
             self._by_id[qid] = stored
+        if normalized_text:
+            self._by_text[normalized_text] = stored
         if signature:
             self._by_signature[signature] = stored
+        if unordered_signature:
+            self._by_unordered_signature[unordered_signature] = stored
         self._items.append(stored)
+    def _candidate_stats(self, *, query_text: str, query_choices: Optional[List[Any]], candidate: Dict[str, Any]) -> Dict[str, float]:
         cand_text = candidate.get("question_text") or candidate.get("stem") or ""
         cand_choices = candidate.get("options_text") or candidate.get("choices") or []
+        norm_query = self._normalize(query_text)
+        norm_cand = self._normalize(cand_text)
+        text_exact = 1.0 if norm_query and norm_query == norm_cand else 0.0
+        text_ratio = SequenceMatcher(None, norm_query, norm_cand).ratio() if norm_query and norm_cand else 0.0
         q_tokens = set(self._tokenize(query_text))
         c_tokens = set(self._tokenize(cand_text))
+        token_overlap = len(q_tokens & c_tokens) / max(len(q_tokens | c_tokens), 1) if q_tokens and c_tokens else 0.0
+        q_sig = self._choice_signature(query_choices, ordered=True)
+        c_sig = self._choice_signature(cand_choices, ordered=True)
+        q_unsig = self._choice_signature(query_choices, ordered=False)
+        c_unsig = self._choice_signature(cand_choices, ordered=False)
+        ordered_choice_match = 1.0 if q_sig and c_sig and q_sig == c_sig else 0.0
+        unordered_choice_match = 1.0 if q_unsig and c_unsig and q_unsig == c_unsig else 0.0
+        score = (
+            0.30 * text_exact
+            + 0.28 * text_ratio
+            + 0.22 * token_overlap
+            + 0.12 * ordered_choice_match
+            + 0.08 * unordered_choice_match
+        )
+        return {
+            "score": score,
+            "text_exact": text_exact,
+            "text_ratio": text_ratio,
+            "token_overlap": token_overlap,
+            "ordered_choice_match": ordered_choice_match,
+            "unordered_choice_match": unordered_choice_match,
+        }
+    def _annotate(self, item: Dict[str, Any], *, mode: str, stats: Optional[Dict[str, float]] = None) -> Dict[str, Any]:
+        out = dict(item)
+        out["support_match"] = {"mode": mode}
+        if stats:
+            out["support_match"].update({k: round(v, 4) for k, v in stats.items()})
+        return out
+    def get(self, question_id: Optional[str] = None, question_text: Optional[str] = None, options_text: Optional[List[Any]] = None) -> Optional[Dict[str, Any]]:
         self._ensure_loaded()
         qid = str(question_id or "").strip()
         if qid and qid in self._by_id:
+            return self._annotate(self._by_id[qid], mode="question_id")
+        signature = self._question_signature(question_text, options_text, ordered=True)
+        if signature and signature in self._by_signature:
+            return self._annotate(self._by_signature[signature], mode="signature_exact")
         qtext = self._normalize(question_text)
         if qtext and qtext in self._by_text:
+            return self._annotate(self._by_text[qtext], mode="text_exact")
+        unordered_signature = self._question_signature(question_text, options_text, ordered=False)
+        if unordered_signature and unordered_signature in self._by_unordered_signature:
+            return self._annotate(self._by_unordered_signature[unordered_signature], mode="signature_unordered")
         if not qtext:
             return None
+        best_item: Optional[Dict[str, Any]] = None
+        best_stats: Optional[Dict[str, float]] = None
         best_score = 0.0
         for item in self._items:
+            stats = self._candidate_stats(query_text=question_text or "", query_choices=options_text, candidate=item)
+            score = stats["score"]
             if score > best_score:
+                best_item = item
+                best_stats = stats
                 best_score = score
+        if not best_item or not best_stats:
+            return None
+        strong_choice = best_stats["ordered_choice_match"] >= 1.0 or best_stats["unordered_choice_match"] >= 1.0
+        threshold = 0.70 if strong_choice else 0.82
+        if best_stats["text_exact"] >= 1.0:
+            threshold = min(threshold, 0.55)
+        elif best_stats["text_ratio"] >= 0.94:
+            threshold = min(threshold, 0.68)
+        elif best_stats["token_overlap"] >= 0.75:
+            threshold = min(threshold, 0.74)
+        if best_score >= threshold:
+            return self._annotate(best_item, mode="fuzzy", stats=best_stats)
         return None
     def upsert(self, item: Dict[str, Any]) -> None:
         return [dict(v) for v in self._items]
+question_support_bank = QuestionSupportBank()

solver_router.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from __future__ import annotations
 from quant_solver import solve_quant
 from solver_absolute_value import solve_absolute_value
 from solver_algebra import solve_algebra
 from solver_combinatorics import solve_combinatorics
@@ -19,21 +18,11 @@ from solver_work_rate import solve_work_rate
 def route_solver(text: str):
-    """
-    Route a question to the most appropriate dedicated solver.
-    Ordering matters:
-    - highly distinctive patterns first
-    - broad/greedy solvers later
-    - algebra and number properties relatively late to avoid stealing
-      percent / ratio / probability / rate questions
-    - final fallback goes to general quant solver
-    """
     if not text or not text.strip():
         return solve_quant(text)
     text = text.strip()
     ordered_solvers = [
         solve_standard_deviation,
         solve_overlapping_sets,
@@ -43,22 +32,18 @@ def route_solver(text: str):
         solve_distance_rate_time,
         solve_percent,
         solve_ratio,
         solve_remainder,
         solve_factorial,
         solve_absolute_value,
         solve_number_properties,
         solve_algebra,
-        solve_probability
     ]
     for solver in ordered_solvers:
         try:
             result = solver(text)
             if result:
                 return result
         except Exception:
-            # Fail open: if one specialized solver crashes,
-            # keep trying the rest instead of breaking the whole pipeline.
             continue
-    return solve_quant(text)

 from __future__ import annotations
 from quant_solver import solve_quant
 from solver_absolute_value import solve_absolute_value
 from solver_algebra import solve_algebra
 from solver_combinatorics import solve_combinatorics
 def route_solver(text: str):
+    """Route to the most specific solver first."""
     if not text or not text.strip():
         return solve_quant(text)
     text = text.strip()
     ordered_solvers = [
         solve_standard_deviation,
         solve_overlapping_sets,
         solve_distance_rate_time,
         solve_percent,
         solve_ratio,
+        solve_probability,
         solve_remainder,
         solve_factorial,
         solve_absolute_value,
         solve_number_properties,
         solve_algebra,
     ]
     for solver in ordered_solvers:
         try:
             result = solver(text)
             if result:
                 return result
         except Exception:
             continue
+    return solve_quant(text)