| from __future__ import annotations |
|
|
| import re |
| from dataclasses import dataclass, field |
| from typing import List, Optional |
|
|
|
|
| @dataclass |
| class ParsedQuestion: |
| raw_text: str |
| normalized_text: str |
|
|
| topic: Optional[str] = None |
| asks_for: Optional[str] = None |
|
|
| givens: List[str] = field(default_factory=list) |
| constraints: List[str] = field(default_factory=list) |
| relationships: List[str] = field(default_factory=list) |
| needed_concepts: List[str] = field(default_factory=list) |
| trap_notes: List[str] = field(default_factory=list) |
|
|
| numbers: List[str] = field(default_factory=list) |
| variables: List[str] = field(default_factory=list) |
|
|
| has_percent: bool = False |
| has_ratio: bool = False |
| has_equation: bool = False |
| has_probability: bool = False |
| has_geometry: bool = False |
| has_statistics: bool = False |
| has_number_properties: bool = False |
|
|
|
|
| def _normalize_text(text: str) -> str: |
| text = (text or "").strip() |
| text = text.replace("’", "'").replace("“", '"').replace("”", '"') |
| text = re.sub(r"\s+", " ", text) |
| return text |
|
|
|
|
| def _extract_numbers(text: str) -> List[str]: |
| return re.findall(r"\b\d+(?:\.\d+)?%?\b", text) |
|
|
|
|
| def _extract_variables(text: str) -> List[str]: |
| vars_found = re.findall(r"\b[a-z]\b", text.lower()) |
| common_noise = {"a", "i"} |
| return [v for v in vars_found if v not in common_noise] |
|
|
|
|
| def _detect_topic(t: str) -> Optional[str]: |
| if "%" in t or "percent" in t or "percentage" in t: |
| return "percent" |
|
|
| if "ratio" in t or "proportion" in t or re.search(r"\b\d+\s*:\s*\d+\b", t): |
| return "ratio" |
|
|
| if any(word in t for word in ["probability", "chance", "odds", "randomly"]): |
| return "probability" |
|
|
| if any(word in t for word in ["mean", "median", "average", "mode", "standard deviation", "range"]): |
| return "statistics" |
|
|
| if any(word in t for word in ["triangle", "circle", "angle", "area", "perimeter", "radius", "diameter"]): |
| return "geometry" |
|
|
| if any(word in t for word in ["remainder", "divisible", "factor", "multiple", "prime", "integer", "even", "odd"]): |
| return "number_theory" |
|
|
| if "=" in t or re.search(r"\bsolve for\b", t) or re.search(r"\bwhat is [a-z]\b", t): |
| return "algebra" |
|
|
| return None |
|
|
|
|
| def _detect_asks_for(t: str, topic: Optional[str]) -> Optional[str]: |
| lower = t.lower() |
|
|
| m = re.search(r"\bwhat is the value of ([a-z])\b", lower) |
| if m: |
| return f"the value of {m.group(1)}" |
|
|
| m = re.search(r"\bsolve for ([a-z])\b", lower) |
| if m: |
| return f"the value of {m.group(1)}" |
|
|
| if topic == "percent": |
| if "original" in lower or "whole" in lower: |
| return "the original whole value" |
| if "percent" in lower and "of" in lower: |
| return "the missing value in a percent relationship" |
| return "the unknown quantity in the percent relationship" |
|
|
| if topic == "ratio": |
| return "the missing part or total in the ratio relationship" |
|
|
| if topic == "probability": |
| return "the probability of the event" |
|
|
| if topic == "statistics": |
| return "the requested statistic" |
|
|
| if topic == "geometry": |
| return "the missing geometric quantity" |
|
|
| if topic == "number_theory": |
| return "the required number property or unknown integer" |
|
|
| if topic == "algebra": |
| return "the value of the variable" |
|
|
| return "the target quantity asked for in the question" |
|
|
|
|
| def _extract_givens(text: str, topic: Optional[str]) -> List[str]: |
| givens: List[str] = [] |
|
|
| if topic == "percent": |
| percents = re.findall(r"\b\d+(?:\.\d+)?%", text) |
| if percents: |
| givens.append(f"A percentage is given: {', '.join(percents[:3])}") |
| if "of" in text.lower(): |
| givens.append("The wording uses a part-of-whole relationship") |
|
|
| if topic == "ratio": |
| ratio_match = re.findall(r"\b\d+\s*:\s*\d+\b", text) |
| if ratio_match: |
| givens.append(f"A ratio is given: {', '.join(ratio_match[:3])}") |
| if "ratio" in text.lower() or "proportion" in text.lower(): |
| givens.append("The question involves a comparison between quantities") |
|
|
| if topic == "algebra": |
| if "=" in text: |
| givens.append("An equation is given") |
| vars_found = _extract_variables(text) |
| if vars_found: |
| givens.append(f"A variable appears in the equation: {', '.join(sorted(set(vars_found))[:3])}") |
|
|
| if topic == "probability": |
| givens.append("The question describes an event and a total set of possible outcomes") |
|
|
| if topic == "statistics": |
| givens.append("The question provides values or a distribution to summarize") |
|
|
| if topic == "geometry": |
| givens.append("The question gives shape-based information") |
|
|
| if topic == "number_theory": |
| givens.append("The question gives number-property information") |
|
|
| numbers = _extract_numbers(text) |
| if numbers: |
| givens.append(f"Numbers mentioned: {', '.join(numbers[:5])}") |
|
|
| return givens |
|
|
|
|
| def _extract_constraints(text: str) -> List[str]: |
| constraints: List[str] = [] |
| lower = text.lower() |
|
|
| phrases = [ |
| "integer", |
| "positive", |
| "negative", |
| "nonnegative", |
| "distinct", |
| "consecutive", |
| "even", |
| "odd", |
| "at least", |
| "at most", |
| "greater than", |
| "less than", |
| "multiple choice", |
| ] |
|
|
| for p in phrases: |
| if p in lower: |
| constraints.append(p) |
|
|
| return constraints |
|
|
|
|
| def _extract_relationships(text: str, topic: Optional[str]) -> List[str]: |
| rel: List[str] = [] |
| lower = text.lower() |
|
|
| if topic == "percent": |
| rel.append("This is a part-percent-whole relationship") |
| if "of" in lower: |
| rel.append("A quantity is being described as a percent of another quantity") |
|
|
| elif topic == "ratio": |
| rel.append("This compares quantities in a fixed proportion") |
| if "total" in lower: |
| rel.append("You may need to connect the ratio parts to a total") |
|
|
| elif topic == "algebra": |
| rel.append("The equal sign means both sides represent the same value") |
| rel.append("You need to isolate the variable while keeping the equation balanced") |
|
|
| elif topic == "probability": |
| rel.append("Probability compares favorable outcomes to total possible outcomes") |
|
|
| elif topic == "statistics": |
| rel.append("You need to match the question to the correct summary measure") |
|
|
| elif topic == "geometry": |
| rel.append("The quantities are linked by properties of the figure") |
|
|
| elif topic == "number_theory": |
| rel.append("The solution depends on a number rule such as divisibility or factors") |
|
|
| return rel |
|
|
|
|
| def _needed_concepts(topic: Optional[str]) -> List[str]: |
| if topic == "percent": |
| return ["percent equation", "part-whole thinking"] |
| if topic == "ratio": |
| return ["ratio structure", "part-to-part or part-to-whole setup"] |
| if topic == "algebra": |
| return ["equation balancing", "inverse operations"] |
| if topic == "probability": |
| return ["favorable outcomes", "total outcomes"] |
| if topic == "statistics": |
| return ["identify the correct statistic", "use the relevant values only"] |
| if topic == "geometry": |
| return ["figure properties", "spatial relationships"] |
| if topic == "number_theory": |
| return ["divisibility/factor rules", "integer properties"] |
| return [] |
|
|
|
|
| def _trap_notes(topic: Optional[str], text: str) -> List[str]: |
| traps: List[str] = [] |
| lower = text.lower() |
|
|
| if topic == "percent": |
| traps.append("Do not confuse the part with the whole") |
| traps.append("Check whether you are solving forward or backward") |
|
|
| elif topic == "ratio": |
| traps.append("Do not add or compare ratio parts inconsistently") |
| traps.append("Check whether the ratio is part-to-part or part-to-whole") |
|
|
| elif topic == "algebra": |
| traps.append("Do not perform an operation on only one side of the equation") |
| traps.append("Watch for distribution or sign mistakes") |
|
|
| elif topic == "probability": |
| traps.append("Do not forget the total number of possible outcomes") |
| traps.append("Check whether order matters") |
|
|
| elif topic == "statistics": |
| traps.append("Do not use the wrong measure") |
| traps.append("Check whether outliers matter") |
|
|
| elif topic == "geometry": |
| traps.append("Do not assume a figure is drawn to scale unless stated") |
| traps.append("Use only relationships actually given") |
|
|
| elif topic == "number_theory": |
| traps.append("Check the exact divisibility or remainder condition") |
| traps.append("Do not assume every integer behaves the same way") |
|
|
| if "except" in lower: |
| traps.append("Watch for exception wording") |
|
|
| return traps |
|
|
|
|
| def parse_question(text: str) -> ParsedQuestion: |
| normalized = _normalize_text(text) |
| lower = normalized.lower() |
|
|
| topic = _detect_topic(lower) |
|
|
| return ParsedQuestion( |
| raw_text=text, |
| normalized_text=normalized, |
| topic=topic, |
| asks_for=_detect_asks_for(normalized, topic), |
| givens=_extract_givens(normalized, topic), |
| constraints=_extract_constraints(normalized), |
| relationships=_extract_relationships(normalized, topic), |
| needed_concepts=_needed_concepts(topic), |
| trap_notes=_trap_notes(topic, normalized), |
| numbers=_extract_numbers(normalized), |
| variables=_extract_variables(normalized), |
| has_percent=("%" in lower or "percent" in lower or "percentage" in lower), |
| has_ratio=("ratio" in lower or "proportion" in lower or bool(re.search(r"\b\d+\s*:\s*\d+\b", lower))), |
| has_equation=("=" in lower), |
| has_probability=any(w in lower for w in ["probability", "chance", "odds", "randomly"]), |
| has_geometry=any(w in lower for w in ["triangle", "circle", "angle", "area", "perimeter", "radius", "diameter"]), |
| has_statistics=any(w in lower for w in ["mean", "median", "average", "mode", "standard deviation", "range"]), |
| has_number_properties=any(w in lower for w in ["remainder", "divisible", "factor", "multiple", "prime", "integer", "even", "odd"]), |
| ) |