File size: 10,307 Bytes
0e143c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 | from __future__ import annotations
import re
from dataclasses import dataclass, field
from typing import List, Optional
@dataclass
class ParsedQuestion:
raw_text: str
normalized_text: str
topic: Optional[str] = None
asks_for: Optional[str] = None
givens: List[str] = field(default_factory=list)
constraints: List[str] = field(default_factory=list)
relationships: List[str] = field(default_factory=list)
needed_concepts: List[str] = field(default_factory=list)
trap_notes: List[str] = field(default_factory=list)
numbers: List[str] = field(default_factory=list)
variables: List[str] = field(default_factory=list)
has_percent: bool = False
has_ratio: bool = False
has_equation: bool = False
has_probability: bool = False
has_geometry: bool = False
has_statistics: bool = False
has_number_properties: bool = False
def _normalize_text(text: str) -> str:
text = (text or "").strip()
text = text.replace("’", "'").replace("“", '"').replace("”", '"')
text = re.sub(r"\s+", " ", text)
return text
def _extract_numbers(text: str) -> List[str]:
return re.findall(r"\b\d+(?:\.\d+)?%?\b", text)
def _extract_variables(text: str) -> List[str]:
vars_found = re.findall(r"\b[a-z]\b", text.lower())
common_noise = {"a", "i"}
return [v for v in vars_found if v not in common_noise]
def _detect_topic(t: str) -> Optional[str]:
if "%" in t or "percent" in t or "percentage" in t:
return "percent"
if "ratio" in t or "proportion" in t or re.search(r"\b\d+\s*:\s*\d+\b", t):
return "ratio"
if any(word in t for word in ["probability", "chance", "odds", "randomly"]):
return "probability"
if any(word in t for word in ["mean", "median", "average", "mode", "standard deviation", "range"]):
return "statistics"
if any(word in t for word in ["triangle", "circle", "angle", "area", "perimeter", "radius", "diameter"]):
return "geometry"
if any(word in t for word in ["remainder", "divisible", "factor", "multiple", "prime", "integer", "even", "odd"]):
return "number_theory"
if "=" in t or re.search(r"\bsolve for\b", t) or re.search(r"\bwhat is [a-z]\b", t):
return "algebra"
return None
def _detect_asks_for(t: str, topic: Optional[str]) -> Optional[str]:
lower = t.lower()
m = re.search(r"\bwhat is the value of ([a-z])\b", lower)
if m:
return f"the value of {m.group(1)}"
m = re.search(r"\bsolve for ([a-z])\b", lower)
if m:
return f"the value of {m.group(1)}"
if topic == "percent":
if "original" in lower or "whole" in lower:
return "the original whole value"
if "percent" in lower and "of" in lower:
return "the missing value in a percent relationship"
return "the unknown quantity in the percent relationship"
if topic == "ratio":
return "the missing part or total in the ratio relationship"
if topic == "probability":
return "the probability of the event"
if topic == "statistics":
return "the requested statistic"
if topic == "geometry":
return "the missing geometric quantity"
if topic == "number_theory":
return "the required number property or unknown integer"
if topic == "algebra":
return "the value of the variable"
return "the target quantity asked for in the question"
def _extract_givens(text: str, topic: Optional[str]) -> List[str]:
givens: List[str] = []
if topic == "percent":
percents = re.findall(r"\b\d+(?:\.\d+)?%", text)
if percents:
givens.append(f"A percentage is given: {', '.join(percents[:3])}")
if "of" in text.lower():
givens.append("The wording uses a part-of-whole relationship")
if topic == "ratio":
ratio_match = re.findall(r"\b\d+\s*:\s*\d+\b", text)
if ratio_match:
givens.append(f"A ratio is given: {', '.join(ratio_match[:3])}")
if "ratio" in text.lower() or "proportion" in text.lower():
givens.append("The question involves a comparison between quantities")
if topic == "algebra":
if "=" in text:
givens.append("An equation is given")
vars_found = _extract_variables(text)
if vars_found:
givens.append(f"A variable appears in the equation: {', '.join(sorted(set(vars_found))[:3])}")
if topic == "probability":
givens.append("The question describes an event and a total set of possible outcomes")
if topic == "statistics":
givens.append("The question provides values or a distribution to summarize")
if topic == "geometry":
givens.append("The question gives shape-based information")
if topic == "number_theory":
givens.append("The question gives number-property information")
numbers = _extract_numbers(text)
if numbers:
givens.append(f"Numbers mentioned: {', '.join(numbers[:5])}")
return givens
def _extract_constraints(text: str) -> List[str]:
constraints: List[str] = []
lower = text.lower()
phrases = [
"integer",
"positive",
"negative",
"nonnegative",
"distinct",
"consecutive",
"even",
"odd",
"at least",
"at most",
"greater than",
"less than",
"multiple choice",
]
for p in phrases:
if p in lower:
constraints.append(p)
return constraints
def _extract_relationships(text: str, topic: Optional[str]) -> List[str]:
rel: List[str] = []
lower = text.lower()
if topic == "percent":
rel.append("This is a part-percent-whole relationship")
if "of" in lower:
rel.append("A quantity is being described as a percent of another quantity")
elif topic == "ratio":
rel.append("This compares quantities in a fixed proportion")
if "total" in lower:
rel.append("You may need to connect the ratio parts to a total")
elif topic == "algebra":
rel.append("The equal sign means both sides represent the same value")
rel.append("You need to isolate the variable while keeping the equation balanced")
elif topic == "probability":
rel.append("Probability compares favorable outcomes to total possible outcomes")
elif topic == "statistics":
rel.append("You need to match the question to the correct summary measure")
elif topic == "geometry":
rel.append("The quantities are linked by properties of the figure")
elif topic == "number_theory":
rel.append("The solution depends on a number rule such as divisibility or factors")
return rel
def _needed_concepts(topic: Optional[str]) -> List[str]:
if topic == "percent":
return ["percent equation", "part-whole thinking"]
if topic == "ratio":
return ["ratio structure", "part-to-part or part-to-whole setup"]
if topic == "algebra":
return ["equation balancing", "inverse operations"]
if topic == "probability":
return ["favorable outcomes", "total outcomes"]
if topic == "statistics":
return ["identify the correct statistic", "use the relevant values only"]
if topic == "geometry":
return ["figure properties", "spatial relationships"]
if topic == "number_theory":
return ["divisibility/factor rules", "integer properties"]
return []
def _trap_notes(topic: Optional[str], text: str) -> List[str]:
traps: List[str] = []
lower = text.lower()
if topic == "percent":
traps.append("Do not confuse the part with the whole")
traps.append("Check whether you are solving forward or backward")
elif topic == "ratio":
traps.append("Do not add or compare ratio parts inconsistently")
traps.append("Check whether the ratio is part-to-part or part-to-whole")
elif topic == "algebra":
traps.append("Do not perform an operation on only one side of the equation")
traps.append("Watch for distribution or sign mistakes")
elif topic == "probability":
traps.append("Do not forget the total number of possible outcomes")
traps.append("Check whether order matters")
elif topic == "statistics":
traps.append("Do not use the wrong measure")
traps.append("Check whether outliers matter")
elif topic == "geometry":
traps.append("Do not assume a figure is drawn to scale unless stated")
traps.append("Use only relationships actually given")
elif topic == "number_theory":
traps.append("Check the exact divisibility or remainder condition")
traps.append("Do not assume every integer behaves the same way")
if "except" in lower:
traps.append("Watch for exception wording")
return traps
def parse_question(text: str) -> ParsedQuestion:
normalized = _normalize_text(text)
lower = normalized.lower()
topic = _detect_topic(lower)
return ParsedQuestion(
raw_text=text,
normalized_text=normalized,
topic=topic,
asks_for=_detect_asks_for(normalized, topic),
givens=_extract_givens(normalized, topic),
constraints=_extract_constraints(normalized),
relationships=_extract_relationships(normalized, topic),
needed_concepts=_needed_concepts(topic),
trap_notes=_trap_notes(topic, normalized),
numbers=_extract_numbers(normalized),
variables=_extract_variables(normalized),
has_percent=("%" in lower or "percent" in lower or "percentage" in lower),
has_ratio=("ratio" in lower or "proportion" in lower or bool(re.search(r"\b\d+\s*:\s*\d+\b", lower))),
has_equation=("=" in lower),
has_probability=any(w in lower for w in ["probability", "chance", "odds", "randomly"]),
has_geometry=any(w in lower for w in ["triangle", "circle", "angle", "area", "perimeter", "radius", "diameter"]),
has_statistics=any(w in lower for w in ["mean", "median", "average", "mode", "standard deviation", "range"]),
has_number_properties=any(w in lower for w in ["remainder", "divisible", "factor", "multiple", "prime", "integer", "even", "odd"]),
) |