Spaces:

j-js
/

GameAI

Sleeping

App Files Files Community

j-js commited on 16 days ago

Commit

c5d6983

verified ·

1 Parent(s): df209d2

Create generate_question_support.py

Browse files

Files changed (1) hide show

generate_question_support.py +232 -0

generate_question_support.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import json
+import re
+from pathlib import Path
+INPUT_PATH = Path("data/gmat_questions.json")
+OUTPUT_PATH = Path("data/question_support_bank.jsonl")
+# ----------------------------
+# Utilities
+# ----------------------------
+def extract_numbers(text):
+    return re.findall(r"\d+\.?\d*", text)
+def detect_topic(q):
+    text = q["questionText"].lower()
+    if "%" in text or "percent" in text:
+        return "percent"
+    if "ratio" in text or ":" in text:
+        return "ratio"
+    if "probability" in text or "chance" in text:
+        return "probability"
+    if any(x in text for x in ["mean", "average", "median", "data", "variance"]):
+        return "statistics"
+    if any(x in text for x in ["area", "circle", "triangle", "perimeter"]):
+        return "geometry"
+    if re.search(r"[a-z]\s*[\+\-\*/=]", text):
+        return "algebra"
+    return "general"
+# ----------------------------
+# Smart Templates
+# ----------------------------
+def percent_template(q, nums):
+    return {
+        "first_step": "Treat the original value as 100 unless a specific number is easier.",
+        "hint_1": "Focus on how the percentage is applied — is it increase, decrease, or part of a whole?",
+        "hint_2": "Convert the percentage into a multiplier (e.g. +20% → ×1.2, -20% → ×0.8).",
+        "hint_3": "Apply each percentage step in order — don’t combine them directly.",
+        "walkthrough_steps": [
+            "Start with an easy base value (like 100).",
+            "Apply the first percentage change.",
+            "Apply the second change to the new value.",
+            "Compare the result with the original."
+        ],
+        "method_explanation": [
+            "Percent changes are multiplicative, not additive.",
+            "Each change affects the updated value.",
+            "Using 100 simplifies calculations."
+        ],
+        "common_trap": "Adding/subtracting percentages directly instead of applying sequential changes."
+    }
+def algebra_template(q, nums):
+    return {
+        "first_step": "Write the equation clearly and identify the variable.",
+        "hint_1": f"Look at the structure: {q['questionText'][:50]}...",
+        "hint_2": "Undo operations in reverse order.",
+        "hint_3": "Keep both sides balanced while isolating the variable.",
+        "walkthrough_steps": [
+            "Identify the equation.",
+            "Move constants to one side.",
+            "Undo multiplication/division.",
+            "Solve for the variable."
+        ],
+        "method_explanation": [
+            "Solve by isolating the variable step by step.",
+            "Reverse operations carefully.",
+            "Check your result by substitution."
+        ],
+        "common_trap": "Forgetting to apply operations to both sides."
+    }
+def ratio_template(q, nums):
+    return {
+        "first_step": "Break the ratio into total parts.",
+        "hint_1": "Add the ratio parts together.",
+        "hint_2": "Find the value of one part.",
+        "hint_3": "Scale up to get the required quantity.",
+        "walkthrough_steps": [
+            "Write ratio as parts.",
+            "Sum the parts.",
+            "Divide total by parts.",
+            "Multiply by needed portion."
+        ],
+        "method_explanation": [
+            "Ratios represent proportional relationships.",
+            "Breaking into equal units simplifies reasoning."
+        ],
+        "common_trap": "Using ratio numbers directly instead of total parts."
+    }
+def probability_template(q, nums):
+    return {
+        "first_step": "Count total outcomes and favorable outcomes.",
+        "hint_1": "How many total possibilities are there?",
+        "hint_2": "How many meet the condition?",
+        "hint_3": "Probability = favorable / total.",
+        "walkthrough_steps": [
+            "Count total outcomes.",
+            "Count favorable outcomes.",
+            "Divide favorable by total."
+        ],
+        "method_explanation": [
+            "Probability is a ratio.",
+            "Clear counting is essential."
+        ],
+        "common_trap": "Incorrect counting of outcomes."
+    }
+def statistics_template(q, nums):
+    return {
+        "first_step": "Identify what measure is being asked (mean, median, etc.).",
+        "hint_1": "Write out the numbers clearly.",
+        "hint_2": "Apply the correct formula.",
+        "hint_3": "Check your calculation.",
+        "walkthrough_steps": [
+            "List values.",
+            "Apply formula (mean, median, etc.).",
+            "Compute carefully."
+        ],
+        "method_explanation": [
+            "Different measures describe data differently.",
+            "Mean = sum / count."
+        ],
+        "common_trap": "Using the wrong measure."
+    }
+def geometry_template(q, nums):
+    return {
+        "first_step": "Identify the shape and formula needed.",
+        "hint_1": "Recall the relevant formula.",
+        "hint_2": "Substitute values carefully.",
+        "hint_3": "Solve step by step.",
+        "walkthrough_steps": [
+            "Identify formula.",
+            "Substitute values.",
+            "Compute result."
+        ],
+        "method_explanation": [
+            "Geometry relies on standard formulas.",
+            "Careful substitution avoids mistakes."
+        ],
+        "common_trap": "Using the wrong formula."
+    }
+def general_template(q, nums):
+    return {
+        "first_step": "Break the question into known and unknown parts.",
+        "hint_1": "What is being asked?",
+        "hint_2": "What information is given?",
+        "hint_3": "How can you link them mathematically?",
+        "walkthrough_steps": [
+            "Understand the problem.",
+            "Identify variables.",
+            "Set up relationships.",
+            "Solve step by step."
+        ],
+        "method_explanation": [
+            "Translate words into math.",
+            "Solve systematically."
+        ],
+        "common_trap": "Misinterpreting the question."
+    }
+# ----------------------------
+# Router
+# ----------------------------
+def generate_support(q):
+    nums = extract_numbers(q["questionText"])
+    topic = detect_topic(q)
+    if topic == "percent":
+        template = percent_template(q, nums)
+    elif topic == "algebra":
+        template = algebra_template(q, nums)
+    elif topic == "ratio":
+        template = ratio_template(q, nums)
+    elif topic == "probability":
+        template = probability_template(q, nums)
+    elif topic == "statistics":
+        template = statistics_template(q, nums)
+    elif topic == "geometry":
+        template = geometry_template(q, nums)
+    else:
+        template = general_template(q, nums)
+    return {
+        "question_id": q["id"],
+        "topic": topic,
+        "stem": q["questionText"],
+        "choices": q["answers"],
+        "correct_option": q["correctIndex"],  # internal use
+        **template
+    }
+# ----------------------------
+# Main
+# ----------------------------
+def main():
+    with open(INPUT_PATH, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    questions = data["items"]
+    with open(OUTPUT_PATH, "w", encoding="utf-8") as out:
+        for q in questions:
+            support = generate_support(q)
+            out.write(json.dumps(support) + "\n")
+    print(f"Generated support bank → {OUTPUT_PATH}")
+if __name__ == "__main__":
+    main()