Spaces:

j-js
/

GameAI

Sleeping

File size: 10,307 Bytes

0e143c5

from __future__ import annotations

import re
from dataclasses import dataclass, field
from typing import List, Optional


@dataclass
class ParsedQuestion:
    raw_text: str
    normalized_text: str

    topic: Optional[str] = None
    asks_for: Optional[str] = None

    givens: List[str] = field(default_factory=list)
    constraints: List[str] = field(default_factory=list)
    relationships: List[str] = field(default_factory=list)
    needed_concepts: List[str] = field(default_factory=list)
    trap_notes: List[str] = field(default_factory=list)

    numbers: List[str] = field(default_factory=list)
    variables: List[str] = field(default_factory=list)

    has_percent: bool = False
    has_ratio: bool = False
    has_equation: bool = False
    has_probability: bool = False
    has_geometry: bool = False
    has_statistics: bool = False
    has_number_properties: bool = False


def _normalize_text(text: str) -> str:
    text = (text or "").strip()
    text = text.replace("’", "'").replace("“", '"').replace("”", '"')
    text = re.sub(r"\s+", " ", text)
    return text


def _extract_numbers(text: str) -> List[str]:
    return re.findall(r"\b\d+(?:\.\d+)?%?\b", text)


def _extract_variables(text: str) -> List[str]:
    vars_found = re.findall(r"\b[a-z]\b", text.lower())
    common_noise = {"a", "i"}
    return [v for v in vars_found if v not in common_noise]


def _detect_topic(t: str) -> Optional[str]:
    if "%" in t or "percent" in t or "percentage" in t:
        return "percent"

    if "ratio" in t or "proportion" in t or re.search(r"\b\d+\s*:\s*\d+\b", t):
        return "ratio"

    if any(word in t for word in ["probability", "chance", "odds", "randomly"]):
        return "probability"

    if any(word in t for word in ["mean", "median", "average", "mode", "standard deviation", "range"]):
        return "statistics"

    if any(word in t for word in ["triangle", "circle", "angle", "area", "perimeter", "radius", "diameter"]):
        return "geometry"

    if any(word in t for word in ["remainder", "divisible", "factor", "multiple", "prime", "integer", "even", "odd"]):
        return "number_theory"

    if "=" in t or re.search(r"\bsolve for\b", t) or re.search(r"\bwhat is [a-z]\b", t):
        return "algebra"

    return None


def _detect_asks_for(t: str, topic: Optional[str]) -> Optional[str]:
    lower = t.lower()

    m = re.search(r"\bwhat is the value of ([a-z])\b", lower)
    if m:
        return f"the value of {m.group(1)}"

    m = re.search(r"\bsolve for ([a-z])\b", lower)
    if m:
        return f"the value of {m.group(1)}"

    if topic == "percent":
        if "original" in lower or "whole" in lower:
            return "the original whole value"
        if "percent" in lower and "of" in lower:
            return "the missing value in a percent relationship"
        return "the unknown quantity in the percent relationship"

    if topic == "ratio":
        return "the missing part or total in the ratio relationship"

    if topic == "probability":
        return "the probability of the event"

    if topic == "statistics":
        return "the requested statistic"

    if topic == "geometry":
        return "the missing geometric quantity"

    if topic == "number_theory":
        return "the required number property or unknown integer"

    if topic == "algebra":
        return "the value of the variable"

    return "the target quantity asked for in the question"


def _extract_givens(text: str, topic: Optional[str]) -> List[str]:
    givens: List[str] = []

    if topic == "percent":
        percents = re.findall(r"\b\d+(?:\.\d+)?%", text)
        if percents:
            givens.append(f"A percentage is given: {', '.join(percents[:3])}")
        if "of" in text.lower():
            givens.append("The wording uses a part-of-whole relationship")

    if topic == "ratio":
        ratio_match = re.findall(r"\b\d+\s*:\s*\d+\b", text)
        if ratio_match:
            givens.append(f"A ratio is given: {', '.join(ratio_match[:3])}")
        if "ratio" in text.lower() or "proportion" in text.lower():
            givens.append("The question involves a comparison between quantities")

    if topic == "algebra":
        if "=" in text:
            givens.append("An equation is given")
        vars_found = _extract_variables(text)
        if vars_found:
            givens.append(f"A variable appears in the equation: {', '.join(sorted(set(vars_found))[:3])}")

    if topic == "probability":
        givens.append("The question describes an event and a total set of possible outcomes")

    if topic == "statistics":
        givens.append("The question provides values or a distribution to summarize")

    if topic == "geometry":
        givens.append("The question gives shape-based information")

    if topic == "number_theory":
        givens.append("The question gives number-property information")

    numbers = _extract_numbers(text)
    if numbers:
        givens.append(f"Numbers mentioned: {', '.join(numbers[:5])}")

    return givens


def _extract_constraints(text: str) -> List[str]:
    constraints: List[str] = []
    lower = text.lower()

    phrases = [
        "integer",
        "positive",
        "negative",
        "nonnegative",
        "distinct",
        "consecutive",
        "even",
        "odd",
        "at least",
        "at most",
        "greater than",
        "less than",
        "multiple choice",
    ]

    for p in phrases:
        if p in lower:
            constraints.append(p)

    return constraints


def _extract_relationships(text: str, topic: Optional[str]) -> List[str]:
    rel: List[str] = []
    lower = text.lower()

    if topic == "percent":
        rel.append("This is a part-percent-whole relationship")
        if "of" in lower:
            rel.append("A quantity is being described as a percent of another quantity")

    elif topic == "ratio":
        rel.append("This compares quantities in a fixed proportion")
        if "total" in lower:
            rel.append("You may need to connect the ratio parts to a total")

    elif topic == "algebra":
        rel.append("The equal sign means both sides represent the same value")
        rel.append("You need to isolate the variable while keeping the equation balanced")

    elif topic == "probability":
        rel.append("Probability compares favorable outcomes to total possible outcomes")

    elif topic == "statistics":
        rel.append("You need to match the question to the correct summary measure")

    elif topic == "geometry":
        rel.append("The quantities are linked by properties of the figure")

    elif topic == "number_theory":
        rel.append("The solution depends on a number rule such as divisibility or factors")

    return rel


def _needed_concepts(topic: Optional[str]) -> List[str]:
    if topic == "percent":
        return ["percent equation", "part-whole thinking"]
    if topic == "ratio":
        return ["ratio structure", "part-to-part or part-to-whole setup"]
    if topic == "algebra":
        return ["equation balancing", "inverse operations"]
    if topic == "probability":
        return ["favorable outcomes", "total outcomes"]
    if topic == "statistics":
        return ["identify the correct statistic", "use the relevant values only"]
    if topic == "geometry":
        return ["figure properties", "spatial relationships"]
    if topic == "number_theory":
        return ["divisibility/factor rules", "integer properties"]
    return []


def _trap_notes(topic: Optional[str], text: str) -> List[str]:
    traps: List[str] = []
    lower = text.lower()

    if topic == "percent":
        traps.append("Do not confuse the part with the whole")
        traps.append("Check whether you are solving forward or backward")

    elif topic == "ratio":
        traps.append("Do not add or compare ratio parts inconsistently")
        traps.append("Check whether the ratio is part-to-part or part-to-whole")

    elif topic == "algebra":
        traps.append("Do not perform an operation on only one side of the equation")
        traps.append("Watch for distribution or sign mistakes")

    elif topic == "probability":
        traps.append("Do not forget the total number of possible outcomes")
        traps.append("Check whether order matters")

    elif topic == "statistics":
        traps.append("Do not use the wrong measure")
        traps.append("Check whether outliers matter")

    elif topic == "geometry":
        traps.append("Do not assume a figure is drawn to scale unless stated")
        traps.append("Use only relationships actually given")

    elif topic == "number_theory":
        traps.append("Check the exact divisibility or remainder condition")
        traps.append("Do not assume every integer behaves the same way")

    if "except" in lower:
        traps.append("Watch for exception wording")

    return traps


def parse_question(text: str) -> ParsedQuestion:
    normalized = _normalize_text(text)
    lower = normalized.lower()

    topic = _detect_topic(lower)

    return ParsedQuestion(
        raw_text=text,
        normalized_text=normalized,
        topic=topic,
        asks_for=_detect_asks_for(normalized, topic),
        givens=_extract_givens(normalized, topic),
        constraints=_extract_constraints(normalized),
        relationships=_extract_relationships(normalized, topic),
        needed_concepts=_needed_concepts(topic),
        trap_notes=_trap_notes(topic, normalized),
        numbers=_extract_numbers(normalized),
        variables=_extract_variables(normalized),
        has_percent=("%" in lower or "percent" in lower or "percentage" in lower),
        has_ratio=("ratio" in lower or "proportion" in lower or bool(re.search(r"\b\d+\s*:\s*\d+\b", lower))),
        has_equation=("=" in lower),
        has_probability=any(w in lower for w in ["probability", "chance", "odds", "randomly"]),
        has_geometry=any(w in lower for w in ["triangle", "circle", "angle", "area", "perimeter", "radius", "diameter"]),
        has_statistics=any(w in lower for w in ["mean", "median", "average", "mode", "standard deviation", "range"]),
        has_number_properties=any(w in lower for w in ["remainder", "divisible", "factor", "multiple", "prime", "integer", "even", "odd"]),
    )