from __future__ import annotations import re from typing import Any, Dict, List, Optional, Tuple from context_parser import detect_intent, intent_to_help_mode from formatting import format_explainer_response, format_reply from generator_engine import GeneratorEngine from models import RetrievedChunk, SolverResult from quant_solver import is_quant_question from question_classifier import classify_question, normalize_category from question_fallback_router import question_fallback_router from retrieval_engine import RetrievalEngine from solver_router import route_solver from explainers.explainer_router import route_explainer DIRECT_SOLVE_PATTERNS = [ r"\bsolve\b", r"\bwhat is\b", r"\bfind\b", r"\bgive (?:me )?the answer\b", r"\bjust the answer\b", r"\banswer only\b", r"\bcalculate\b", ] CONTROL_PREFIX_PATTERNS = [ r"^\s*solve\s*:\s*", r"^\s*solve\s+", r"^\s*question\s*:\s*", r"^\s*q\s*:\s*", r"^\s*hint\s*:\s*", r"^\s*hint\s*$", r"^\s*next hint\s*:\s*", r"^\s*next hint\s*$", r"^\s*another hint\s*:\s*", r"^\s*another hint\s*$", r"^\s*walkthrough\s*:\s*", r"^\s*walkthrough\s*$", r"^\s*step by step\s*:\s*", r"^\s*step by step\s*$", r"^\s*explain\s*:\s*", r"^\s*explain\s*$", r"^\s*method\s*:\s*", r"^\s*method\s*$", r"^\s*continue\s*$", r"^\s*go on\s*$", r"^\s*next step\s*$", ] FOLLOWUP_ONLY_INPUTS = { "hint", "a hint", "give me a hint", "can i have a hint", "next hint", "another hint", "more hint", "more hints", "second hint", "third hint", "second next hint", "what do i do first", "what should i do first", "what do i do next", "what should i do next", "first step", "next step", "continue", "go on", "walk me through it", "step by step", "walkthrough", "i'm confused", "im confused", "confused", "explain more", "more explanation", "can you explain that", "help me understand", "help", } def _clean_text(text: Optional[str]) -> str: return (text or "").strip() def _safe_get_state(session_state: Optional[Dict[str, Any]]) -> Dict[str, Any]: return dict(session_state) if isinstance(session_state, dict) else {} def _extract_question_candidates_from_history_item(item: Dict[str, Any]) -> List[str]: if not isinstance(item, dict): return [] candidates: List[str] = [] for key in ("question_text", "raw_user_text", "content", "text", "message"): value = item.get(key) if isinstance(value, str) and value.strip(): candidates.append(value.strip()) meta = item.get("meta") if isinstance(meta, dict): for key in ("question_text", "recovered_question_text"): value = meta.get(key) if isinstance(value, str) and value.strip(): candidates.append(value.strip()) nested_state = meta.get("session_state") if isinstance(nested_state, dict): value = nested_state.get("question_text") if isinstance(value, str) and value.strip(): candidates.append(value.strip()) return candidates def _is_followup_hint_only(text: str) -> bool: low = (text or "").strip().lower() return low in FOLLOWUP_ONLY_INPUTS def _strip_control_prefix(text: str) -> str: cleaned = (text or "").strip() if not cleaned: return "" previous = None while previous != cleaned: previous = cleaned for pattern in CONTROL_PREFIX_PATTERNS: cleaned = re.sub(pattern, "", cleaned, flags=re.I).strip() return cleaned def _sanitize_question_text(text: str) -> str: raw = (text or "").strip() if not raw: return "" lines = [line.strip() for line in raw.splitlines() if line.strip()] for line in lines: candidate = _strip_control_prefix(line) if candidate and not _is_followup_hint_only(candidate): return candidate return _strip_control_prefix(raw) def _looks_like_question_text(text: str) -> bool: t = (text or "").strip() if not t: return False low = t.lower() return any( [ "=" in t, "%" in t, bool(re.search(r"\b\d+\s*:\s*\d+\b", t)), bool(re.search(r"[a-zA-Z]\s*[\+\-\*/=]", t)), any( k in low for k in [ "what is", "find", "if ", "how many", "probability", "ratio", "percent", "equation", "integer", "triangle", "circle", "mean", "median", "average", "remainder", "prime", "factor", "divisible", "area", "perimeter", "circumference", ] ), ] ) def _is_topic_query(text: str) -> bool: low = _clean_text(text).lower() if not low: return False exact_patterns = [ "what topic is this question", "what topic is this", "what is the topic of this question", "what is the topic", "what type of question is this", "what type is this question", "what kind of question is this", "what kind is this question", "what area is this", "what concept is this", "what concept is this testing", "what skill is this testing", "what is this testing", "identify the topic", "identify the concept", "identify the type of question", ] if any(phrase in low for phrase in exact_patterns): return True if "topic" in low and "this" in low: return True if "testing" in low and "this" in low: return True if "type" in low and "question" in low: return True if "kind" in low and "question" in low: return True return False def _specific_topic_from_question(question_text: str, fallback_topic: str, classified_topic: str) -> str: q = _clean_text(question_text).lower() topic = (fallback_topic or classified_topic or "general").lower() if any(k in q for k in ["variability", "spread", "standard deviation"]): return "variability" if any(k in q for k in ["mean", "average"]): return "mean" if "median" in q: return "median" if "range" in q: return "range" if any(k in q for k in ["probability", "chance", "odds", "at random", "chosen at random"]): return "probability" if ( "ratio" in q or re.search(r"\b[a-z]\s*/\s*[a-z]\b", q) or re.search(r"\b\d+\s*/\s*\d+\b", q) or "proportion" in q ): return "ratio" if "percent" in q or "%" in q: return "percent" if topic == "data" and any(k in q for k in ["dataset", "table", "chart", "graph"]): return "statistics" return topic def _build_topic_query_reply(question_text: str, fallback_topic: str, classified_topic: str, category: str) -> str: specific = _specific_topic_from_question(question_text, fallback_topic, classified_topic) cat = (category or "").strip() if specific == "variability": return ( "- This is a statistics / data insight question about variability (spread).\n" "- The key idea is to compare how spread out each dataset is, not which one has the biggest average.\n" "- A good first move is to compare how far the outer values sit from the middle value in each set." ) if specific == "statistics": return ( "- This is a statistics / data insight question.\n" "- The key skill is spotting which statistical idea matters most, then comparing the answer choices using that idea." ) if specific == "algebra": return ( "- This is an algebra question.\n" "- The key skill is undoing the operations around the variable in a logical order." ) if specific == "ratio": return ( "- This is a ratio question.\n" "- The key skill is turning the ratio into consistent parts and then building the requested expression from those parts." ) if specific == "percent": return ( "- This is a percent question.\n" "- The key skill is identifying the correct base quantity before applying the percent relationship." ) if specific == "probability": return ( "- This is a probability question.\n" "- The key skill is deciding what counts as a successful outcome and then comparing favorable outcomes with total outcomes." ) label = specific if specific != "general" else (cat.lower() if cat else "quantitative reasoning") return f"- This looks like a {label} question." def _classify_input_type(raw_user_text: str) -> str: text = _clean_text(raw_user_text).lower() if not text: return "empty" if _is_topic_query(raw_user_text): return "topic_query" if any( p in text for p in [ "what do i do first", "what should i do first", "first step", "where do i start", "how should i start", ] ): return "hint" if text in {"hint", "a hint", "give me a hint", "can i have a hint"} or text.startswith("hint:"): return "hint" if any( phrase in text for phrase in [ "next hint", "another hint", "more hint", "more hints", "second hint", "third hint", "second next hint", "what do i do next", "what should i do next", "next step", "continue", "go on", ] ) or text.startswith("next hint:"): return "next_hint" if any( x in text for x in [ "walkthrough", "step by step", "i'm confused", "im confused", "confused", "explain more", "help me understand", "method", "explain", "how do i solve", "how do i do this", ] ): return "confusion" if text.startswith("solve:") or text.startswith("solve "): return "solve" if _looks_like_question_text(_strip_control_prefix(raw_user_text)): return "question" return "other" def _is_followup_input(input_type: str) -> bool: return input_type in {"hint", "next_hint", "confusion"} def _history_hint_stage(chat_history: Optional[List[Dict[str, Any]]]) -> int: best = 0 for item in chat_history or []: if not isinstance(item, dict): continue try: best = max(best, int(item.get("hint_stage", 0) or 0)) except Exception: pass meta = item.get("meta") if isinstance(meta, dict): try: best = max(best, int(meta.get("hint_stage", 0) or 0)) except Exception: pass nested_state = meta.get("session_state") if isinstance(nested_state, dict): try: best = max(best, int(nested_state.get("hint_stage", 0) or 0)) except Exception: pass return min(best, 3) def _recover_question_text( raw_user_text: str, question_text: Optional[str], chat_history: Optional[List[Dict[str, Any]]], input_type: str, ) -> str: explicit = _sanitize_question_text(question_text or "") if explicit: return explicit direct_candidate = _sanitize_question_text(raw_user_text) if direct_candidate and _looks_like_question_text(direct_candidate): return direct_candidate if not _is_followup_input(input_type): return direct_candidate for item in reversed(chat_history or []): for candidate in _extract_question_candidates_from_history_item(item): recovered = _sanitize_question_text(candidate) if recovered and not _is_followup_hint_only(recovered) and _looks_like_question_text(recovered): return recovered return "" def _choose_effective_question_text( raw_user_text: str, question_text: Optional[str], input_type: str, state: Dict[str, Any], chat_history: Optional[List[Dict[str, Any]]], ) -> Tuple[str, bool]: explicit_question = _sanitize_question_text(question_text or "") stored_question = _sanitize_question_text(state.get("question_text", "")) if _is_followup_input(input_type): if explicit_question and _looks_like_question_text(explicit_question): return explicit_question, False direct_candidate = _sanitize_question_text(raw_user_text) if direct_candidate and _looks_like_question_text(direct_candidate): return direct_candidate, False if stored_question and _looks_like_question_text(stored_question): return stored_question, True recovered = _recover_question_text(raw_user_text, question_text, chat_history, input_type) return recovered, True if explicit_question: return explicit_question, False return _sanitize_question_text(raw_user_text), False def _compute_hint_stage(input_type: str, prior_hint_stage: int, fallback_history_stage: int = 0) -> int: base = max(int(prior_hint_stage or 0), int(fallback_history_stage or 0)) if input_type in {"solve", "question"}: return 0 if input_type == "hint": return min(max(1, base if base > 0 else 1), 3) if input_type == "next_hint": return min((base if base > 0 else 1) + 1, 3) if input_type == "confusion": return 1 return min(base, 3) def _update_session_state( state: Dict[str, Any], *, question_text: str, question_id: Optional[str], hint_stage: int, user_last_input_type: str, built_on_previous_turn: bool, help_mode: str, intent: str, topic: Optional[str], category: Optional[str], ) -> Dict[str, Any]: if question_text: state["question_text"] = question_text if question_id: state["question_id"] = question_id state["hint_stage"] = int(hint_stage or 0) state["user_last_input_type"] = user_last_input_type state["built_on_previous_turn"] = bool(built_on_previous_turn) state["help_mode"] = help_mode state["intent"] = intent state["topic"] = topic state["category"] = category return state def _normalize_classified_topic(topic: Optional[str], category: Optional[str], question_text: str) -> str: t = (topic or "").strip().lower() q = (question_text or "").lower() c = normalize_category(category) has_ratio_form = bool(re.search(r"\b\d+\s*:\s*\d+\b", q)) has_algebra_form = ( "=" in q or bool(re.search(r"\b[xyzabn]\b", q)) or bool(re.search(r"\d+[a-z]\b", q)) or bool(re.search(r"\b[a-z]\s*[\+\-\*/=]", q)) ) if t not in {"general_quant", "general", "unknown", ""}: return t if "%" in q or "percent" in q: return "percent" if "ratio" in q or has_ratio_form: return "ratio" if any(k in q for k in ["probability", "chosen at random", "odds", "chance"]): return "probability" if any(k in q for k in ["divisible", "remainder", "prime", "factor"]): return "number_theory" if any(k in q for k in ["circle", "triangle", "perimeter", "area", "circumference", "rectangle"]): return "geometry" if any(k in q for k in ["mean", "median", "average", "variability", "standard deviation"]): return "statistics" if c == "Quantitative" else "data" if has_algebra_form: return "algebra" if c == "DataInsight": return "data" if c == "Verbal": return "verbal" if c == "Quantitative": return "quant" return "general" def _strip_bullet_prefix(text: str) -> str: return re.sub(r"^\s*[-•]\s*", "", (text or "").strip()) def _safe_steps(steps: List[str]) -> List[str]: banned_patterns = [ r"\bthe answer is\b", r"\banswer:\b", r"\bthat gives\b", r"\bthis gives\b", r"\btherefore\b", r"\bthus\b", r"\bresult is\b", r"\bfinal answer\b", ] cleaned: List[str] = [] for step in steps: s = _strip_bullet_prefix(step) lowered = s.lower() if any(re.search(pattern, lowered) for pattern in banned_patterns): continue if s: cleaned.append(s) deduped: List[str] = [] seen = set() for step in cleaned: key = step.lower().strip() if key and key not in seen: seen.add(key) deduped.append(step) return deduped def _safe_meta_list(items: Any) -> List[str]: if not items: return [] if isinstance(items, list): return [str(x).strip() for x in items if str(x).strip()] if isinstance(items, tuple): return [str(x).strip() for x in items if str(x).strip()] if isinstance(items, str): text = items.strip() return [text] if text else [] return [] def _safe_meta_text(value: Any) -> Optional[str]: if value is None: return None text = str(value).strip() return text or None def _extract_explainer_scaffold(explainer_result: Any) -> Dict[str, Any]: scaffold = getattr(explainer_result, "scaffold", None) if scaffold is None: return {} return { "concept": _safe_meta_text(getattr(scaffold, "concept", None)), "ask": _safe_meta_text(getattr(scaffold, "ask", None)), "givens": _safe_meta_list(getattr(scaffold, "givens", [])), "target": _safe_meta_text(getattr(scaffold, "target", None)), "setup_actions": _safe_meta_list(getattr(scaffold, "setup_actions", [])), "intermediate_steps": _safe_meta_list(getattr(scaffold, "intermediate_steps", [])), "first_move": _safe_meta_text(getattr(scaffold, "first_move", None)), "next_hint": _safe_meta_text(getattr(scaffold, "next_hint", None)), "common_traps": _safe_meta_list(getattr(scaffold, "common_traps", [])), "variables_to_define": _safe_meta_list(getattr(scaffold, "variables_to_define", [])), "equations_to_form": _safe_meta_list(getattr(scaffold, "equations_to_form", [])), "answer_hidden": bool(getattr(scaffold, "answer_hidden", True)), "solution_path_type": _safe_meta_text(getattr(scaffold, "solution_path_type", None)), "key_operations": _safe_meta_list(getattr(scaffold, "key_operations", [])), "hint_ladder": _safe_meta_list(getattr(scaffold, "hint_ladder", [])), } def _get_result_steps(result: Optional[SolverResult]) -> List[str]: if result is None: return [] display_steps = getattr(result, "display_steps", None) if isinstance(display_steps, list) and display_steps: return _safe_steps(display_steps) result_steps = getattr(result, "steps", None) if isinstance(result_steps, list) and result_steps: return _safe_steps(result_steps) meta = getattr(result, "meta", {}) or {} meta_display_steps = meta.get("display_steps") if isinstance(meta_display_steps, list) and meta_display_steps: return _safe_steps(meta_display_steps) meta_steps = meta.get("steps") if isinstance(meta_steps, list) and meta_steps: return _safe_steps(meta_steps) return [] def _apply_safe_step_sanitization(result: Optional[SolverResult]) -> None: if result is None: return safe_steps = _get_result_steps(result) result.steps = list(safe_steps) setattr(result, "display_steps", list(safe_steps)) result.meta = result.meta or {} result.meta["steps"] = list(safe_steps) result.meta["display_steps"] = list(safe_steps) def _solver_has_useful_steps(result: Optional[SolverResult]) -> bool: return bool(result is not None and _get_result_steps(result)) def _parse_numeric_option_set(option: str) -> Optional[List[float]]: raw = _clean_text(option) if not raw: return None try: parts = [float(x.strip()) for x in raw.split(",") if x.strip()] except Exception: return None return parts if len(parts) >= 2 else None def _looks_like_simple_linear_equation(question_text: str) -> bool: q = _clean_text(question_text).lower() return bool( "=" in q and re.search(r"\bwhat is\s+[a-z]\b", q) and re.search(r"\b\d+[a-z]\b|\b[a-z]\b", q) ) def _question_specific_ratio_reply(question_text: str) -> str: q = _clean_text(question_text) low = q.lower() if re.search(r"\b[a-z]\s*/\s*[a-z]\s*=\s*\d+\s*/\s*\d+", low) and re.search(r"what is\s*\(", low): return ( "- Treat the ratio as matching parts: if a/b = 3/4, you can set a = 3k and b = 4k.\n" "- Substitute those part-values into the expression the question asks for instead of solving for specific numbers.\n" "- After substitution, simplify the expression by cancelling the common factor k." ) return ( "- Rewrite the ratio using matching parts, such as 3k and 4k, before touching the target expression.\n" "- Build the requested expression from those parts, then simplify only at the end." ) def _question_specific_variability_reply(options_text: Optional[List[str]]) -> str: parsed = [_parse_numeric_option_set(opt) for opt in (options_text or [])] valid = [p for p in parsed if p] if valid and all(len(p) == 3 for p in valid): return ( "- This is asking about variability, so compare spread rather than average.\n" "- For each three-number set, use the middle value as the centre and compare how far the outer numbers sit from it.\n" "- The dataset whose values stretch furthest away from the centre is the one with the greatest variability." ) return ( "- This is asking about variability, so focus on spread rather than the average.\n" "- Compare how tightly clustered or widely spaced the values are in each answer choice.\n" "- The choice with the widest spread is the strongest candidate." ) def _question_specific_percent_reply(question_text: str, user_text: str = "") -> str: clean = _clean_text(question_text) low = clean.lower() user_low = _clean_text(user_text).lower() nums = re.findall(r"-?\d+(?:\.\d+)?", clean) wants_first = any(p in user_low for p in ["what should i do first", "what do i do first", "first step", "where do i start", "how should i start"]) wants_method = any(p in user_low for p in ["how do i solve", "how do i do this", "method", "walkthrough", "step by step", "explain"]) if "increased by" in low and "decreased by" in low: if wants_first: return ( "- First turn each percent change into a multiplier instead of combining the percentages directly.\n" "- Apply the increase multiplier to the original amount, then apply the decrease multiplier to the updated amount." ) return ( "- For back-to-back percent changes, turn the changes into multipliers instead of trying to combine the percentages directly.\n" "- Apply the increase multiplier first, then the decrease multiplier to that new amount.\n" "- Compare the final multiplier with 1 to decide whether the result is above or below the original." ) if "out of" in low and len(nums) >= 2: part, whole = nums[0], nums[1] if wants_first: return ( f"- First write the relationship as the fraction {part}/{whole}.\n" f"- Use {whole} as the total and {part} as the part before doing any percent conversion." ) if wants_method: return ( f"- This is a part-over-whole percent question, so start by writing {part}/{whole}.\n" f"- Use {whole} as the base because it is the total, and {part} as the part that matches the condition.\n" "- Then convert that fraction to a percent by simplifying or turning it into a decimal and multiplying by 100." ) return ( f"- This is a part-over-whole percent question: start by writing the fraction as {part}/{whole}.\n" f"- Use {whole} as the base because it is the total, and {part} as the part that chose the option.\n" "- Then convert that fraction to a percent by simplifying or turning it into a decimal and multiplying by 100." ) if any(k in low for k in ["of", "what percent", "%"]): if wants_first: return ( "- First ask 'percent of what?' so you identify the correct base quantity.\n" "- Then put the part over the whole before converting anything to a percent." ) return ( "- Ask 'percent of what?' first so you identify the correct base quantity.\n" "- Put the part over the whole before doing any percent conversion.\n" "- Only multiply by 100 after the fraction is set up correctly." ) return ( "- Identify the base quantity first, because percent relationships only make sense relative to a base.\n" "- Translate the wording into either a multiplier or a percent equation before simplifying." ) def _question_specific_probability_reply(question_text: str, user_text: str = "", options_text: Optional[List[str]] = None) -> str: q = _clean_text(question_text) low = q.lower() user_low = _clean_text(user_text).lower() option_count = len(options_text or []) wants_first = any( phrase in user_low for phrase in [ "what should i do first", "what do i do first", "first step", "where do i start", "how should i start", ] ) wants_method = any( phrase in user_low for phrase in [ "how do i solve", "how do i do this", "method", "walkthrough", "step by step", "explain", ] ) single_draw_markers = [ "chosen at random", "select one", "choose one", "one ball", "one card", "one marble", "one object", "selected at random", "picked at random", "one ball is chosen", "one card is drawn", ] container_markers = [ "box contains", "bag contains", "urn contains", "deck", "balls", "cards", "marbles", "dice", "coin", ] if any(m in low for m in single_draw_markers) or ("probability" in low and any(m in low for m in container_markers)): if wants_first: return ( "- First decide what counts as a successful outcome.\n" "- Then count the total number of possible outcomes in the box, bag, or sample space." ) if wants_method: lines = [ "- For a one-draw probability question, use favorable outcomes over total outcomes.", "- Count how many outcomes match the condition, then count the total number of possible outcomes.", "- Build the fraction favorable/total before matching it to an answer choice.", ] if option_count: lines.append("- Once the fraction is set up, compare it directly with the options.") return "\n".join(lines) lines = [ "- Start by deciding what counts as a successful outcome in this question.", "- Then count the total number of possible outcomes in the container or sample space.", "- Set up the probability as favorable outcomes over total outcomes before comparing the answer choices.", ] if option_count: lines.append("- Use that fraction to match the answer choices instead of doing extra work.") return "\n".join(lines) if "at least" in low: if wants_first: return ( "- First check whether the complement is easier than counting the requested cases directly.\n" "- For 'at least' problems, the opposite event is often simpler to compute first." ) return ( "- Start by deciding whether the complement is easier than counting the requested cases directly.\n" "- For an 'at least' question, it is often simpler to find the probability of the opposite event first.\n" "- Then subtract that result from 1 at the end." ) if any(k in low for k in ["and", "both", "then", "after"]) and any(k in low for k in ["probability", "chosen", "random"]): if wants_first: return ( "- First decide whether the events happen together or separately.\n" "- Then work out whether you need multiplication, addition, or the complement rule." ) return ( "- First identify whether the events happen together or separately.\n" "- Then decide whether you should multiply probabilities, add them, or use the complement.\n" "- Keep track of whether the total outcomes change after each step." ) if wants_first: return ( "- First identify the favorable outcomes.\n" "- Then identify the total possible outcomes before simplifying anything." ) return ( "- Start by identifying the favorable outcomes and the total possible outcomes.\n" "- Then build the probability as favorable over total before simplifying or matching an answer choice." ) def _question_specific_algebra_reply(question_text: str, user_text: str = "") -> str: q = _clean_text(question_text) low = q.lower() user_low = _clean_text(user_text).lower() wants_first = any( phrase in user_low for phrase in [ "what should i do first", "what do i do first", "first step", "where do i start", "how should i start", ] ) if _looks_like_simple_linear_equation(q): if wants_first: return ( "- First look at the variable side and ask which operation is furthest away from the variable.\n" "- Undo that outside addition or subtraction on both sides before touching the coefficient." ) return ( "- Treat this as a linear equation and undo the operations around the variable in reverse order.\n" "- First remove the constant attached to the variable side by doing the opposite operation on both sides.\n" "- Then undo the multiplication or division on the variable to isolate it." ) if re.search(r"\b[a-z]\s*/\s*[a-z]\s*=\s*\d+\s*/\s*\d+", low): return _question_specific_ratio_reply(q) if "what is" in low and "(" in low and ")" in low and any(sym in low for sym in ["a+b", "x+y", "a-b", "x-y"]): return ( "- Start by rewriting one variable in terms of the other using the relationship you were given.\n" "- Then substitute into the exact expression in parentheses, rather than trying to solve for actual numbers.\n" "- Simplify only after the whole target expression has been rewritten in one variable or in matching parts." ) if wants_first: return ( "- First turn the wording into one clean equation.\n" "- Then decide which operation around the variable should be undone first." ) return ( "- Turn the wording into one clean equation first.\n" "- Then undo the operations around the variable in reverse order until the variable stands alone." ) def _question_specific_hint_ladder( *, question_text: str, options_text: Optional[List[str]], classified_topic: str, ) -> List[str]: q = _clean_text(question_text) low = q.lower() topic = (classified_topic or "general").lower() if _looks_like_simple_linear_equation(q) or topic == "algebra": return [ "Look at the variable side and ask which operation is furthest away from the variable.", "Undo the addition or subtraction first by doing the opposite on both sides.", "Once the variable term is alone, undo the multiplication or division on the variable.", ] if topic == "probability" or any(k in low for k in ["probability", "chance", "odds", "at random", "chosen at random"]): return [ "What counts as a successful outcome here?", "How many total possible outcomes are there?", "Set up the probability as favorable over total before comparing answer choices.", ] if topic == "percent" or "%" in low or "percent" in low: if "out of" in low: return [ "Which number is the part and which number is the total?", "Write the relationship as part over whole before converting anything.", "Once the fraction is correct, convert it to a percent.", ] return [ "Ask 'percent of what?' first.", "Put the part over the base quantity.", "Only multiply by 100 after the fraction or equation is set up correctly.", ] if any(k in low for k in ["variability", "spread", "standard deviation"]): return [ "This is about spread, not average.", "Compare how far the outer values sit from the middle value in each set.", "The set with the widest spread has the greatest variability.", ] if re.search(r"\b[a-z]\s*/\s*[a-z]\s*=\s*\d+\s*/\s*\d+", low): return [ "Rewrite the ratio using matching parts such as 3k and 4k.", "Substitute those matching parts into the expression the question asks for.", "Simplify after substitution by cancelling the common factor.", ] return [] def _build_question_specific_reply( *, question_text: str, options_text: Optional[List[str]], classified_topic: str, help_mode: str, input_type: str, user_text: str, ) -> str: q = _clean_text(question_text) low = q.lower() topic = (classified_topic or "general").lower() user_low = _clean_text(user_text).lower() if not q: return "" explicit_help_ask = ( input_type in {"hint", "next_hint", "confusion"} or any( phrase in user_low for phrase in [ "how do i solve", "how do i do this", "what do i do first", "what should i do first", "how should i start", "where do i start", "first step", ] ) ) if any(k in low for k in ["variability", "spread", "standard deviation"]): return _question_specific_variability_reply(options_text) if topic == "probability" or any( k in low for k in ["probability", "chance", "odds", "at random", "chosen at random"] ): return _question_specific_probability_reply(q, user_low, options_text) if topic in {"ratio", "algebra"}: return _question_specific_algebra_reply(q, user_low) if topic == "percent" or "%" in low or "percent" in low: return _question_specific_percent_reply(q, user_low) if topic == "statistics" and any(k in low for k in ["dataset", "table", "chart", "graph"]): return ( "- Read the question stem first, then decide which statistic matters before comparing answer choices.\n" "- Use the structure of the choices to compare them efficiently instead of computing unnecessary extra values." ) if explicit_help_ask: return "- Start by identifying the main relationship in the question, then use that relationship to set up the first step." return "" def _answer_path_from_steps(steps: List[str], verbosity: float) -> str: safe_steps = _safe_steps(steps) if not safe_steps: return "" shown_steps = safe_steps[:2] if verbosity < 0.35 else safe_steps[:3] if verbosity < 0.8 else safe_steps return "\n".join(f"- {step}" for step in shown_steps) def _build_fallback_reply( *, question_id: Optional[str], question_text: str, options_text: Optional[List[str]], topic: Optional[str], category: Optional[str], help_mode: str, hint_stage: int, verbosity: float, ) -> Tuple[str, Dict[str, Any]]: payload = question_fallback_router.build_response( question_id=question_id, question_text=question_text, options_text=options_text, topic=topic, category=category, help_mode=help_mode, hint_stage=hint_stage, verbosity=verbosity, ) lines = payload.get("lines") or ["Start by identifying the main relationship in the problem."] pack = payload.get("pack") or {} return "\n".join(f"- {line}" for line in lines if str(line).strip()), pack def _is_direct_solve_request(text: str, intent: str) -> bool: if intent == "answer": return True t = re.sub(r"\s+", " ", (text or "").strip().lower()) if any(re.search(p, t) for p in DIRECT_SOLVE_PATTERNS): if not any(word in t for word in ["how", "explain", "why", "method", "hint", "define", "definition", "step"]): return True return False def _is_help_first_mode(help_mode: str) -> bool: return help_mode in {"hint", "walkthrough", "explain", "instruction", "step_by_step"} def _should_try_solver(is_quant: bool, help_mode: str, solver_input: str) -> bool: if not is_quant or not solver_input: return False return help_mode in {"answer", "walkthrough", "instruction", "hint", "step_by_step"} def _support_pack_is_strong(fallback_pack: Dict[str, Any]) -> bool: if not fallback_pack: return False support_source = str(fallback_pack.get("support_source", "")).strip().lower() support_match = fallback_pack.get("support_match") or {} match_mode = str(support_match.get("mode", "")).strip().lower() if support_source in {"question_bank", "question_bank_refined"}: return True if match_mode in {"question_id", "signature_exact", "text_exact", "signature_unordered", "fuzzy"}: return True if support_source == "generated_question_specific": return bool(fallback_pack.get("topic") and _safe_meta_list(fallback_pack.get("hint_ladder", []))) return bool(fallback_pack) def _should_prefer_question_support(help_mode: str, fallback_pack: Dict[str, Any]) -> bool: if not fallback_pack: return False if help_mode in {"hint", "walkthrough", "instruction", "step_by_step", "explain", "method"}: return _support_pack_is_strong(fallback_pack) return False def _minimal_generic_reply(category: Optional[str]) -> str: c = normalize_category(category) if c == "Verbal": return "I can help analyse the wording or logic, but I need the full question text to guide you properly." if c == "DataInsight": return "I can help reason through the data, but I need the full question or chart details to guide you properly." return "Start by identifying the main relationship in the problem." class ConversationEngine: def __init__( self, retriever: Optional[RetrievalEngine] = None, generator: Optional[GeneratorEngine] = None, **kwargs, ) -> None: self.retriever = retriever self.generator = generator def generate_response( self, raw_user_text: Optional[str] = None, tone: float = 0.5, verbosity: float = 0.5, transparency: float = 0.5, intent: Optional[str] = None, help_mode: Optional[str] = None, retrieval_context: Optional[List[RetrievedChunk]] = None, chat_history: Optional[List[Dict[str, Any]]] = None, question_text: Optional[str] = None, options_text: Optional[List[str]] = None, question_id: Optional[str] = None, session_state: Optional[Dict[str, Any]] = None, **kwargs, ) -> SolverResult: user_text = _clean_text(raw_user_text) state = _safe_get_state(session_state) input_type = _classify_input_type(user_text) effective_question_text, built_on_previous_turn = _choose_effective_question_text( raw_user_text=user_text, question_text=question_text, input_type=input_type, state=state, chat_history=chat_history, ) if _is_followup_input(input_type): built_on_previous_turn = True solver_input = _sanitize_question_text(effective_question_text) question_id = question_id or state.get("question_id") category = normalize_category(kwargs.get("category")) classification = classify_question(question_text=solver_input, category=category) inferred_category = normalize_category(classification.get("category") or category) question_topic = _normalize_classified_topic(classification.get("topic"), inferred_category, solver_input) if input_type == "topic_query": reply = _build_topic_query_reply( solver_input, question_topic, classification.get("topic") or "", inferred_category, ) result = SolverResult( domain="general", solved=False, help_mode="explain", topic=question_topic or "general", used_retrieval=False, used_generator=False, steps=[], teaching_chunks=[], meta={}, ) state = _update_session_state( state, question_text=solver_input, question_id=question_id, hint_stage=0, user_last_input_type=input_type, built_on_previous_turn=built_on_previous_turn, help_mode="explain", intent="topic_query", topic=question_topic, category=inferred_category, ) result.reply = format_reply( reply, tone=tone, verbosity=verbosity, transparency=transparency, help_mode="explain", hint_stage=0, topic=question_topic, ) result.meta = { "response_source": "topic_classifier", "help_mode": "explain", "intent": "topic_query", "question_text": solver_input or "", "options_count": len(options_text or []), "category": inferred_category if inferred_category else "General", "user_last_input_type": input_type, "built_on_previous_turn": built_on_previous_turn, "session_state": state, "used_retrieval": False, "used_generator": False, "question_support_used": False, } return result resolved_intent = intent or detect_intent(user_text, help_mode) if input_type == "next_hint": resolved_intent = "hint" elif input_type == "confusion": resolved_intent = "method" elif input_type in {"solve", "question"} and resolved_intent in {"hint", "walkthrough", "step_by_step"}: resolved_intent = "answer" resolved_help_mode = help_mode or intent_to_help_mode(resolved_intent) if input_type in {"hint", "next_hint"}: resolved_help_mode = "hint" elif input_type == "confusion": resolved_help_mode = "explain" elif resolved_help_mode == "step_by_step": resolved_help_mode = "walkthrough" prior_hint_stage = int(state.get("hint_stage", 0) or 0) history_hint_stage = _history_hint_stage(chat_history) hint_stage = _compute_hint_stage(input_type, prior_hint_stage, history_hint_stage) is_quant = bool(solver_input) and ( inferred_category == "Quantitative" or is_quant_question(solver_input) ) result = SolverResult( domain="quant" if is_quant else "general", solved=False, help_mode=resolved_help_mode, topic=question_topic if is_quant else "general", used_retrieval=False, used_generator=False, steps=[], teaching_chunks=[], meta={}, ) solver_result: Optional[SolverResult] = None if _should_try_solver(is_quant, resolved_help_mode, solver_input): try: solver_result = route_solver(solver_input) except Exception: solver_result = None _apply_safe_step_sanitization(solver_result) explainer_result = None explainer_understood = False explainer_scaffold: Dict[str, Any] = {} if solver_input: try: explainer_result = route_explainer(solver_input) except Exception: explainer_result = None if explainer_result is not None and getattr(explainer_result, "understood", False): explainer_understood = True explainer_scaffold = _extract_explainer_scaffold(explainer_result) fallback_reply_core = "" fallback_pack: Dict[str, Any] = {} if solver_input: fallback_reply_core, fallback_pack = _build_fallback_reply( question_id=question_id, question_text=solver_input, options_text=options_text, topic=question_topic, category=inferred_category, help_mode=resolved_help_mode, hint_stage=hint_stage, verbosity=verbosity, ) question_specific_reply_core = _build_question_specific_reply( question_text=solver_input, options_text=options_text, classified_topic=question_topic, help_mode=resolved_help_mode, input_type=input_type, user_text=user_text, ) if solver_result is not None: result.meta = result.meta or {} solver_topic = getattr(solver_result, "topic", None) or "unknown" compatible_topics = { question_topic, "general_quant", "general", "unknown", } if question_topic == "algebra": compatible_topics.update({"ratio"}) elif question_topic == "ratio": compatible_topics.update({"algebra"}) elif question_topic == "percent": compatible_topics.update({"ratio", "algebra"}) if solver_topic in compatible_topics: result = solver_result result.domain = "quant" result.meta = result.meta or {} result.topic = question_topic if question_topic else solver_topic result.meta["solver_topic_accepted"] = solver_topic else: result.meta["solver_topic_rejected"] = solver_topic result.meta["solver_topic_expected"] = question_topic result.topic = question_topic if is_quant else result.topic else: result.meta = result.meta or {} result.topic = question_topic if is_quant else result.topic _apply_safe_step_sanitization(result) solver_steps = _get_result_steps(result) solver_has_steps = bool(solver_steps) prefer_question_support = _should_prefer_question_support(resolved_help_mode, fallback_pack) direct_solve_request = _is_direct_solve_request(user_text or solver_input, resolved_intent) solver_topic_ok = result.meta.get("solver_topic_rejected") is None result.help_mode = resolved_help_mode result.meta = result.meta or {} result.meta["hint_stage"] = hint_stage result.meta["resolved_intent"] = resolved_intent result.meta["input_type"] = input_type result.meta["built_on_previous_turn"] = built_on_previous_turn result.meta["question_topic"] = question_topic result.meta["inferred_category"] = inferred_category result.meta["question_id"] = question_id result.meta["solver_used"] = solver_result is not None result.meta["solver_topic_ok"] = solver_topic_ok result.meta["explainer_used"] = False result.meta["explainer_understood"] = explainer_understood result.meta["question_support_used"] = False result.meta["question_support_topic"] = fallback_pack.get("topic") if fallback_pack else None result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None result.meta["question_support_match"] = fallback_pack.get("support_match") if fallback_pack else None result.meta["question_support_strong"] = _support_pack_is_strong(fallback_pack) result.meta["prefer_question_support"] = prefer_question_support result.meta["explainer_scaffold"] = explainer_scaffold if input_type in {"hint", "next_hint"}: hint_lines: List[str] = [] support_is_strong = _support_pack_is_strong(fallback_pack) if fallback_pack: fallback_hints = _safe_meta_list(fallback_pack.get("hint_ladder", [])) if fallback_hints: idx = min(max(hint_stage - 1, 0), len(fallback_hints) - 1) hint_lines = [fallback_hints[idx]] if verbosity >= 0.62 and idx + 1 < len(fallback_hints): hint_lines.append(fallback_hints[idx + 1]) if not hint_lines: custom_ladder = _question_specific_hint_ladder( question_text=solver_input, options_text=options_text, classified_topic=question_topic, ) if custom_ladder: idx = min(max(hint_stage - 1, 0), len(custom_ladder) - 1) hint_lines = [custom_ladder[idx]] if verbosity >= 0.62 and idx + 1 < len(custom_ladder): hint_lines.append(custom_ladder[idx + 1]) if not hint_lines and explainer_scaffold: ladder = _safe_meta_list(explainer_scaffold.get("hint_ladder", [])) first_move = _safe_meta_text(explainer_scaffold.get("first_move")) next_hint_text = _safe_meta_text(explainer_scaffold.get("next_hint")) if hint_stage <= 1 and first_move: hint_lines = [first_move] elif ladder: idx = min(max(hint_stage - 1, 0), len(ladder) - 1) hint_lines = [ladder[idx]] elif next_hint_text: hint_lines = [next_hint_text] if not hint_lines and fallback_reply_core: split_lines = [line.strip("- ").strip() for line in fallback_reply_core.splitlines() if line.strip()] if split_lines: idx = min(max(hint_stage - 1, 0), len(split_lines) - 1) hint_lines = [split_lines[idx]] if not hint_lines: hint_lines = [_minimal_generic_reply(inferred_category)] reply_core = "\n".join(f"- {line}" for line in hint_lines if str(line).strip()) result.meta["response_source"] = "hint_ladder" if support_is_strong else "hint_router" result.meta["question_support_used"] = bool(fallback_pack) result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None result.meta["question_support_topic"] = fallback_pack.get("topic") if fallback_pack else None reply = format_reply( reply_core, tone=tone, verbosity=verbosity, transparency=transparency, help_mode="hint", hint_stage=hint_stage, topic=result.topic, ) elif question_specific_reply_core and ( input_type not in {"hint", "next_hint"} and not (prefer_question_support and fallback_reply_core) and ( _is_help_first_mode(resolved_help_mode) or input_type in {"other", "confusion"} or any( phrase in _clean_text(user_text).lower() for phrase in [ "how do i solve", "what do i do first", "what should i do first", "what do i do next", "what should i do next", "how should i start", ] ) ) ): reply_core = question_specific_reply_core result.meta["response_source"] = "question_specific" result.meta["question_support_used"] = bool(fallback_pack) result.meta["question_support_source"] = fallback_pack.get("support_source") if fallback_pack else None result.meta["question_support_topic"] = fallback_pack.get("topic") if fallback_pack else None reply = format_reply( reply_core, tone=tone, verbosity=verbosity, transparency=transparency, help_mode=resolved_help_mode, hint_stage=hint_stage, topic=result.topic, ) elif resolved_help_mode == "explain" and prefer_question_support and fallback_reply_core: reply_core = fallback_reply_core result.meta["response_source"] = "question_support" result.meta["question_support_used"] = True result.meta["question_support_source"] = fallback_pack.get("support_source") result.meta["question_support_topic"] = fallback_pack.get("topic") reply = format_reply( reply_core, tone=tone, verbosity=verbosity, transparency=transparency, help_mode=resolved_help_mode, hint_stage=hint_stage, topic=result.topic, ) elif resolved_help_mode == "explain" and explainer_understood: reply = format_explainer_response( result=explainer_result, tone=tone, verbosity=verbosity, transparency=transparency, help_mode=resolved_help_mode, hint_stage=hint_stage, ) result.meta["response_source"] = "explainer" result.meta["explainer_used"] = True result.meta["question_support_used"] = False elif _is_help_first_mode(resolved_help_mode) and prefer_question_support and fallback_reply_core: reply_core = fallback_reply_core result.meta["response_source"] = "question_support" result.meta["question_support_used"] = True result.meta["question_support_source"] = fallback_pack.get("support_source") result.meta["question_support_topic"] = fallback_pack.get("topic") reply = format_reply( reply_core, tone=tone, verbosity=verbosity, transparency=transparency, help_mode=resolved_help_mode, hint_stage=hint_stage, topic=result.topic, ) elif ( resolved_help_mode == "answer" and solver_has_steps and solver_topic_ok and direct_solve_request ): reply_core = _answer_path_from_steps(solver_steps, verbosity=verbosity) result.meta["response_source"] = "solver_steps" result.meta["question_support_used"] = False reply = format_reply( reply_core, tone=tone, verbosity=verbosity, transparency=transparency, help_mode=resolved_help_mode, hint_stage=hint_stage, topic=result.topic, ) elif ( resolved_help_mode == "walkthrough" and solver_has_steps and not prefer_question_support and solver_topic_ok ): reply_core = _answer_path_from_steps(solver_steps, verbosity=verbosity) result.meta["response_source"] = "solver_steps" result.meta["question_support_used"] = False reply = format_reply( reply_core, tone=tone, verbosity=verbosity, transparency=transparency, help_mode=resolved_help_mode, hint_stage=hint_stage, topic=result.topic, ) elif fallback_reply_core: reply_core = fallback_reply_core result.meta["response_source"] = "question_support" if _support_pack_is_strong(fallback_pack) else "fallback" result.meta["question_support_used"] = bool(fallback_pack) result.meta["question_support_source"] = fallback_pack.get("support_source") result.meta["question_support_topic"] = fallback_pack.get("topic") reply = format_reply( reply_core, tone=tone, verbosity=verbosity, transparency=transparency, help_mode=resolved_help_mode, hint_stage=hint_stage, topic=result.topic, ) else: reply_core = _minimal_generic_reply(inferred_category) if not reply_core.startswith("- "): reply_core = f"- {reply_core}" result.meta["response_source"] = "generic" result.meta["question_support_used"] = False reply = format_reply( reply_core, tone=tone, verbosity=verbosity, transparency=transparency, help_mode=resolved_help_mode, hint_stage=hint_stage, topic=result.topic, ) if resolved_help_mode in {"hint", "walkthrough", "explain", "instruction", "step_by_step"}: result.solved = False result.answer_letter = None result.answer_value = None result.internal_answer = None result.meta["internal_answer"] = None can_reveal_answer = bool(result.solved and direct_solve_request and not _is_help_first_mode(resolved_help_mode)) result.meta["can_reveal_answer"] = can_reveal_answer if not can_reveal_answer: result.answer_letter = None result.answer_value = None result.internal_answer = None result.meta["internal_answer"] = None state = _update_session_state( state, question_text=solver_input, question_id=question_id, hint_stage=hint_stage, user_last_input_type=input_type, built_on_previous_turn=built_on_previous_turn, help_mode=resolved_help_mode, intent=resolved_intent, topic=result.topic, category=inferred_category, ) result.reply = reply result.help_mode = resolved_help_mode result.meta["help_mode"] = resolved_help_mode result.meta["intent"] = resolved_intent result.meta["question_text"] = solver_input or "" result.meta["options_count"] = len(options_text or []) result.meta["category"] = inferred_category if inferred_category else "General" result.meta["user_last_input_type"] = input_type result.meta["built_on_previous_turn"] = built_on_previous_turn result.meta["session_state"] = state result.meta["used_retrieval"] = False result.meta["used_generator"] = False return result