Spaces:

j-js
/

GameAI

Sleeping

App Files Files Community

GameAI / generate_question_support.py

j-js

Update generate_question_support.py

18361f1 verified 15 days ago

raw

history blame contribute delete

8.34 kB

	import json
	import re
	from pathlib import Path

	INPUT_PATH = Path("data/gmat_questions.json")
	OUTPUT_PATH = Path("data/question_support_bank.jsonl")


	# ----------------------------
	# Utilities
	# ----------------------------
	def extract_numbers(text):
	return re.findall(r"\d+\.?\d*", text)


	def detect_topic(q):
	text = q["questionText"].lower()

	if "%" in text or "percent" in text:
	return "percent"

	if "ratio" in text or ":" in text:
	return "ratio"

	if "probability" in text or "chance" in text:
	return "probability"

	if any(x in text for x in ["mean", "average", "median", "data", "variance"]):
	return "statistics"

	if any(x in text for x in ["area", "circle", "triangle", "perimeter"]):
	return "geometry"

	if re.search(r"[a-z]\s[\+\-\/=]", text):
	return "algebra"

	return "general"


	# ----------------------------
	# Smart Templates
	# ----------------------------
	def percent_template(q, nums):
	return {
	"first_step": "Treat the original value as 100 unless a specific number is easier.",
	"hint_1": "Focus on how the percentage is applied — is it increase, decrease, or part of a whole?",
	"hint_2": "Convert the percentage into a multiplier (e.g. +20% → ×1.2, -20% → ×0.8).",
	"hint_3": "Apply each percentage step in order — don’t combine them directly.",
	"walkthrough_steps": [
	"Start with an easy base value (like 100).",
	"Apply the first percentage change.",
	"Apply the second change to the new value.",
	"Compare the result with the original."
	],
	"method_explanation": [
	"Percent changes are multiplicative, not additive.",
	"Each change affects the updated value.",
	"Using 100 simplifies calculations."
	],
	"common_trap": "Adding/subtracting percentages directly instead of applying sequential changes."
	}


	def algebra_template(q, nums):
	text = q["questionText"]

	# Try to extract equation
	match = re.search(r"([^\?]+)=([^\?]+)", text)

	if match:
	lhs = match.group(1).strip()
	rhs = match.group(2).strip()

	return {
	"first_step": f"Start with the equation: {lhs} = {rhs}",
	"hint_1": "Focus on isolating the variable.",
	"hint_2": "Undo addition/subtraction first.",
	"hint_3": "Then undo multiplication/division.",
	"walkthrough_steps": [
	f"Start with: {lhs} = {rhs}",
	"Move constants to one side.",
	"Undo multiplication/division.",
	"Solve for the variable."
	],
	"method_explanation": [
	"Solve by isolating the variable.",
	"Reverse operations step by step.",
	"Keep both sides balanced."
	],
	"common_trap": "Forgetting to reverse operations in the correct order."
	}

	# fallback if parsing fails
	return {
	"first_step": "Identify the variable and isolate it step by step.",
	"hint_1": "Look at what operations are applied to the variable.",
	"hint_2": "Undo operations in reverse order.",
	"hint_3": "Keep both sides balanced while simplifying.",
	"walkthrough_steps": [
	"Identify the variable.",
	"Move constants to one side.",
	"Undo multiplication/division.",
	"Simplify to isolate the variable."
	],
	"method_explanation": [
	"Algebra problems require isolating the variable.",
	"Reverse operations systematically."
	],
	"common_trap": "Forgetting to apply operations to both sides."
	}

	def ratio_template(q, nums):
	return {
	"first_step": "Break the ratio into total parts.",
	"hint_1": "Add the ratio parts together.",
	"hint_2": "Find the value of one part.",
	"hint_3": "Scale up to get the required quantity.",
	"walkthrough_steps": [
	"Write ratio as parts.",
	"Sum the parts.",
	"Divide total by parts.",
	"Multiply by needed portion."
	],
	"method_explanation": [
	"Ratios represent proportional relationships.",
	"Breaking into equal units simplifies reasoning."
	],
	"common_trap": "Using ratio numbers directly instead of total parts."
	}


	def probability_template(q, nums):
	return {
	"first_step": "Count total outcomes and favorable outcomes.",
	"hint_1": "How many total possibilities are there?",
	"hint_2": "How many meet the condition?",
	"hint_3": "Probability = favorable / total.",
	"walkthrough_steps": [
	"Count total outcomes.",
	"Count favorable outcomes.",
	"Divide favorable by total."
	],
	"method_explanation": [
	"Probability is a ratio.",
	"Clear counting is essential."
	],
	"common_trap": "Incorrect counting of outcomes."
	}


	def statistics_template(q, nums):
	return {
	"first_step": "Identify what measure is being asked (mean, median, etc.).",
	"hint_1": "Write out the numbers clearly.",
	"hint_2": "Apply the correct formula.",
	"hint_3": "Check your calculation.",
	"walkthrough_steps": [
	"List values.",
	"Apply formula (mean, median, etc.).",
	"Compute carefully."
	],
	"method_explanation": [
	"Different measures describe data differently.",
	"Mean = sum / count."
	],
	"common_trap": "Using the wrong measure."
	}


	def geometry_template(q, nums):
	return {
	"first_step": "Identify the shape and formula needed.",
	"hint_1": "Recall the relevant formula.",
	"hint_2": "Substitute values carefully.",
	"hint_3": "Solve step by step.",
	"walkthrough_steps": [
	"Identify formula.",
	"Substitute values.",
	"Compute result."
	],
	"method_explanation": [
	"Geometry relies on standard formulas.",
	"Careful substitution avoids mistakes."
	],
	"common_trap": "Using the wrong formula."
	}


	def general_template(q, nums):
	return {
	"first_step": "Break the question into known and unknown parts.",
	"hint_1": "What is being asked?",
	"hint_2": "What information is given?",
	"hint_3": "How can you link them mathematically?",
	"walkthrough_steps": [
	"Understand the problem.",
	"Identify variables.",
	"Set up relationships.",
	"Solve step by step."
	],
	"method_explanation": [
	"Translate words into math.",
	"Solve systematically."
	],
	"common_trap": "Misinterpreting the question."
	}


	# ----------------------------
	# Router
	# ----------------------------
	def generate_support(q):
	nums = extract_numbers(q["questionText"])
	topic = detect_topic(q)

	if topic == "percent":
	template = percent_template(q, nums)
	elif topic == "algebra":
	template = algebra_template(q, nums)
	elif topic == "ratio":
	template = ratio_template(q, nums)
	elif topic == "probability":
	template = probability_template(q, nums)
	elif topic == "statistics":
	template = statistics_template(q, nums)
	elif topic == "geometry":
	template = geometry_template(q, nums)
	else:
	template = general_template(q, nums)

	return {
	"question_id": q["id"],
	"topic": topic,
	"stem": q["questionText"],
	"choices": q["answers"],
	"correct_option": q["correctIndex"], # internal use
	**template
	}


	# ----------------------------
	# Main
	# ----------------------------
	def main():
	with open(INPUT_PATH, "r", encoding="utf-8") as f:
	data = json.load(f)

	questions = data["items"]

	with open(OUTPUT_PATH, "w", encoding="utf-8") as out:
	for q in questions:
	support = generate_support(q)
	out.write(json.dumps(support) + "\n")

	print(f"Generated support bank → {OUTPUT_PATH}")


	if __name__ == "__main__":
	main()