GameAI / generate_question_support.py
j-js's picture
Update generate_question_support.py
18361f1 verified
import json
import re
from pathlib import Path
INPUT_PATH = Path("data/gmat_questions.json")
OUTPUT_PATH = Path("data/question_support_bank.jsonl")
# ----------------------------
# Utilities
# ----------------------------
def extract_numbers(text):
return re.findall(r"\d+\.?\d*", text)
def detect_topic(q):
text = q["questionText"].lower()
if "%" in text or "percent" in text:
return "percent"
if "ratio" in text or ":" in text:
return "ratio"
if "probability" in text or "chance" in text:
return "probability"
if any(x in text for x in ["mean", "average", "median", "data", "variance"]):
return "statistics"
if any(x in text for x in ["area", "circle", "triangle", "perimeter"]):
return "geometry"
if re.search(r"[a-z]\s*[\+\-\*/=]", text):
return "algebra"
return "general"
# ----------------------------
# Smart Templates
# ----------------------------
def percent_template(q, nums):
return {
"first_step": "Treat the original value as 100 unless a specific number is easier.",
"hint_1": "Focus on how the percentage is applied — is it increase, decrease, or part of a whole?",
"hint_2": "Convert the percentage into a multiplier (e.g. +20% → ×1.2, -20% → ×0.8).",
"hint_3": "Apply each percentage step in order — don’t combine them directly.",
"walkthrough_steps": [
"Start with an easy base value (like 100).",
"Apply the first percentage change.",
"Apply the second change to the new value.",
"Compare the result with the original."
],
"method_explanation": [
"Percent changes are multiplicative, not additive.",
"Each change affects the updated value.",
"Using 100 simplifies calculations."
],
"common_trap": "Adding/subtracting percentages directly instead of applying sequential changes."
}
def algebra_template(q, nums):
text = q["questionText"]
# Try to extract equation
match = re.search(r"([^\?]+)=([^\?]+)", text)
if match:
lhs = match.group(1).strip()
rhs = match.group(2).strip()
return {
"first_step": f"Start with the equation: {lhs} = {rhs}",
"hint_1": "Focus on isolating the variable.",
"hint_2": "Undo addition/subtraction first.",
"hint_3": "Then undo multiplication/division.",
"walkthrough_steps": [
f"Start with: {lhs} = {rhs}",
"Move constants to one side.",
"Undo multiplication/division.",
"Solve for the variable."
],
"method_explanation": [
"Solve by isolating the variable.",
"Reverse operations step by step.",
"Keep both sides balanced."
],
"common_trap": "Forgetting to reverse operations in the correct order."
}
# fallback if parsing fails
return {
"first_step": "Identify the variable and isolate it step by step.",
"hint_1": "Look at what operations are applied to the variable.",
"hint_2": "Undo operations in reverse order.",
"hint_3": "Keep both sides balanced while simplifying.",
"walkthrough_steps": [
"Identify the variable.",
"Move constants to one side.",
"Undo multiplication/division.",
"Simplify to isolate the variable."
],
"method_explanation": [
"Algebra problems require isolating the variable.",
"Reverse operations systematically."
],
"common_trap": "Forgetting to apply operations to both sides."
}
def ratio_template(q, nums):
return {
"first_step": "Break the ratio into total parts.",
"hint_1": "Add the ratio parts together.",
"hint_2": "Find the value of one part.",
"hint_3": "Scale up to get the required quantity.",
"walkthrough_steps": [
"Write ratio as parts.",
"Sum the parts.",
"Divide total by parts.",
"Multiply by needed portion."
],
"method_explanation": [
"Ratios represent proportional relationships.",
"Breaking into equal units simplifies reasoning."
],
"common_trap": "Using ratio numbers directly instead of total parts."
}
def probability_template(q, nums):
return {
"first_step": "Count total outcomes and favorable outcomes.",
"hint_1": "How many total possibilities are there?",
"hint_2": "How many meet the condition?",
"hint_3": "Probability = favorable / total.",
"walkthrough_steps": [
"Count total outcomes.",
"Count favorable outcomes.",
"Divide favorable by total."
],
"method_explanation": [
"Probability is a ratio.",
"Clear counting is essential."
],
"common_trap": "Incorrect counting of outcomes."
}
def statistics_template(q, nums):
return {
"first_step": "Identify what measure is being asked (mean, median, etc.).",
"hint_1": "Write out the numbers clearly.",
"hint_2": "Apply the correct formula.",
"hint_3": "Check your calculation.",
"walkthrough_steps": [
"List values.",
"Apply formula (mean, median, etc.).",
"Compute carefully."
],
"method_explanation": [
"Different measures describe data differently.",
"Mean = sum / count."
],
"common_trap": "Using the wrong measure."
}
def geometry_template(q, nums):
return {
"first_step": "Identify the shape and formula needed.",
"hint_1": "Recall the relevant formula.",
"hint_2": "Substitute values carefully.",
"hint_3": "Solve step by step.",
"walkthrough_steps": [
"Identify formula.",
"Substitute values.",
"Compute result."
],
"method_explanation": [
"Geometry relies on standard formulas.",
"Careful substitution avoids mistakes."
],
"common_trap": "Using the wrong formula."
}
def general_template(q, nums):
return {
"first_step": "Break the question into known and unknown parts.",
"hint_1": "What is being asked?",
"hint_2": "What information is given?",
"hint_3": "How can you link them mathematically?",
"walkthrough_steps": [
"Understand the problem.",
"Identify variables.",
"Set up relationships.",
"Solve step by step."
],
"method_explanation": [
"Translate words into math.",
"Solve systematically."
],
"common_trap": "Misinterpreting the question."
}
# ----------------------------
# Router
# ----------------------------
def generate_support(q):
nums = extract_numbers(q["questionText"])
topic = detect_topic(q)
if topic == "percent":
template = percent_template(q, nums)
elif topic == "algebra":
template = algebra_template(q, nums)
elif topic == "ratio":
template = ratio_template(q, nums)
elif topic == "probability":
template = probability_template(q, nums)
elif topic == "statistics":
template = statistics_template(q, nums)
elif topic == "geometry":
template = geometry_template(q, nums)
else:
template = general_template(q, nums)
return {
"question_id": q["id"],
"topic": topic,
"stem": q["questionText"],
"choices": q["answers"],
"correct_option": q["correctIndex"], # internal use
**template
}
# ----------------------------
# Main
# ----------------------------
def main():
with open(INPUT_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
questions = data["items"]
with open(OUTPUT_PATH, "w", encoding="utf-8") as out:
for q in questions:
support = generate_support(q)
out.write(json.dumps(support) + "\n")
print(f"Generated support bank → {OUTPUT_PATH}")
if __name__ == "__main__":
main()