j-js commited on
Commit
c5d6983
·
verified ·
1 Parent(s): df209d2

Create generate_question_support.py

Browse files
Files changed (1) hide show
  1. generate_question_support.py +232 -0
generate_question_support.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from pathlib import Path
4
+
5
+ INPUT_PATH = Path("data/gmat_questions.json")
6
+ OUTPUT_PATH = Path("data/question_support_bank.jsonl")
7
+
8
+
9
+ # ----------------------------
10
+ # Utilities
11
+ # ----------------------------
12
+ def extract_numbers(text):
13
+ return re.findall(r"\d+\.?\d*", text)
14
+
15
+
16
+ def detect_topic(q):
17
+ text = q["questionText"].lower()
18
+
19
+ if "%" in text or "percent" in text:
20
+ return "percent"
21
+
22
+ if "ratio" in text or ":" in text:
23
+ return "ratio"
24
+
25
+ if "probability" in text or "chance" in text:
26
+ return "probability"
27
+
28
+ if any(x in text for x in ["mean", "average", "median", "data", "variance"]):
29
+ return "statistics"
30
+
31
+ if any(x in text for x in ["area", "circle", "triangle", "perimeter"]):
32
+ return "geometry"
33
+
34
+ if re.search(r"[a-z]\s*[\+\-\*/=]", text):
35
+ return "algebra"
36
+
37
+ return "general"
38
+
39
+
40
+ # ----------------------------
41
+ # Smart Templates
42
+ # ----------------------------
43
+ def percent_template(q, nums):
44
+ return {
45
+ "first_step": "Treat the original value as 100 unless a specific number is easier.",
46
+ "hint_1": "Focus on how the percentage is applied — is it increase, decrease, or part of a whole?",
47
+ "hint_2": "Convert the percentage into a multiplier (e.g. +20% → ×1.2, -20% → ×0.8).",
48
+ "hint_3": "Apply each percentage step in order — don’t combine them directly.",
49
+ "walkthrough_steps": [
50
+ "Start with an easy base value (like 100).",
51
+ "Apply the first percentage change.",
52
+ "Apply the second change to the new value.",
53
+ "Compare the result with the original."
54
+ ],
55
+ "method_explanation": [
56
+ "Percent changes are multiplicative, not additive.",
57
+ "Each change affects the updated value.",
58
+ "Using 100 simplifies calculations."
59
+ ],
60
+ "common_trap": "Adding/subtracting percentages directly instead of applying sequential changes."
61
+ }
62
+
63
+
64
+ def algebra_template(q, nums):
65
+ return {
66
+ "first_step": "Write the equation clearly and identify the variable.",
67
+ "hint_1": f"Look at the structure: {q['questionText'][:50]}...",
68
+ "hint_2": "Undo operations in reverse order.",
69
+ "hint_3": "Keep both sides balanced while isolating the variable.",
70
+ "walkthrough_steps": [
71
+ "Identify the equation.",
72
+ "Move constants to one side.",
73
+ "Undo multiplication/division.",
74
+ "Solve for the variable."
75
+ ],
76
+ "method_explanation": [
77
+ "Solve by isolating the variable step by step.",
78
+ "Reverse operations carefully.",
79
+ "Check your result by substitution."
80
+ ],
81
+ "common_trap": "Forgetting to apply operations to both sides."
82
+ }
83
+
84
+
85
+ def ratio_template(q, nums):
86
+ return {
87
+ "first_step": "Break the ratio into total parts.",
88
+ "hint_1": "Add the ratio parts together.",
89
+ "hint_2": "Find the value of one part.",
90
+ "hint_3": "Scale up to get the required quantity.",
91
+ "walkthrough_steps": [
92
+ "Write ratio as parts.",
93
+ "Sum the parts.",
94
+ "Divide total by parts.",
95
+ "Multiply by needed portion."
96
+ ],
97
+ "method_explanation": [
98
+ "Ratios represent proportional relationships.",
99
+ "Breaking into equal units simplifies reasoning."
100
+ ],
101
+ "common_trap": "Using ratio numbers directly instead of total parts."
102
+ }
103
+
104
+
105
+ def probability_template(q, nums):
106
+ return {
107
+ "first_step": "Count total outcomes and favorable outcomes.",
108
+ "hint_1": "How many total possibilities are there?",
109
+ "hint_2": "How many meet the condition?",
110
+ "hint_3": "Probability = favorable / total.",
111
+ "walkthrough_steps": [
112
+ "Count total outcomes.",
113
+ "Count favorable outcomes.",
114
+ "Divide favorable by total."
115
+ ],
116
+ "method_explanation": [
117
+ "Probability is a ratio.",
118
+ "Clear counting is essential."
119
+ ],
120
+ "common_trap": "Incorrect counting of outcomes."
121
+ }
122
+
123
+
124
+ def statistics_template(q, nums):
125
+ return {
126
+ "first_step": "Identify what measure is being asked (mean, median, etc.).",
127
+ "hint_1": "Write out the numbers clearly.",
128
+ "hint_2": "Apply the correct formula.",
129
+ "hint_3": "Check your calculation.",
130
+ "walkthrough_steps": [
131
+ "List values.",
132
+ "Apply formula (mean, median, etc.).",
133
+ "Compute carefully."
134
+ ],
135
+ "method_explanation": [
136
+ "Different measures describe data differently.",
137
+ "Mean = sum / count."
138
+ ],
139
+ "common_trap": "Using the wrong measure."
140
+ }
141
+
142
+
143
+ def geometry_template(q, nums):
144
+ return {
145
+ "first_step": "Identify the shape and formula needed.",
146
+ "hint_1": "Recall the relevant formula.",
147
+ "hint_2": "Substitute values carefully.",
148
+ "hint_3": "Solve step by step.",
149
+ "walkthrough_steps": [
150
+ "Identify formula.",
151
+ "Substitute values.",
152
+ "Compute result."
153
+ ],
154
+ "method_explanation": [
155
+ "Geometry relies on standard formulas.",
156
+ "Careful substitution avoids mistakes."
157
+ ],
158
+ "common_trap": "Using the wrong formula."
159
+ }
160
+
161
+
162
+ def general_template(q, nums):
163
+ return {
164
+ "first_step": "Break the question into known and unknown parts.",
165
+ "hint_1": "What is being asked?",
166
+ "hint_2": "What information is given?",
167
+ "hint_3": "How can you link them mathematically?",
168
+ "walkthrough_steps": [
169
+ "Understand the problem.",
170
+ "Identify variables.",
171
+ "Set up relationships.",
172
+ "Solve step by step."
173
+ ],
174
+ "method_explanation": [
175
+ "Translate words into math.",
176
+ "Solve systematically."
177
+ ],
178
+ "common_trap": "Misinterpreting the question."
179
+ }
180
+
181
+
182
+ # ----------------------------
183
+ # Router
184
+ # ----------------------------
185
+ def generate_support(q):
186
+ nums = extract_numbers(q["questionText"])
187
+ topic = detect_topic(q)
188
+
189
+ if topic == "percent":
190
+ template = percent_template(q, nums)
191
+ elif topic == "algebra":
192
+ template = algebra_template(q, nums)
193
+ elif topic == "ratio":
194
+ template = ratio_template(q, nums)
195
+ elif topic == "probability":
196
+ template = probability_template(q, nums)
197
+ elif topic == "statistics":
198
+ template = statistics_template(q, nums)
199
+ elif topic == "geometry":
200
+ template = geometry_template(q, nums)
201
+ else:
202
+ template = general_template(q, nums)
203
+
204
+ return {
205
+ "question_id": q["id"],
206
+ "topic": topic,
207
+ "stem": q["questionText"],
208
+ "choices": q["answers"],
209
+ "correct_option": q["correctIndex"], # internal use
210
+ **template
211
+ }
212
+
213
+
214
+ # ----------------------------
215
+ # Main
216
+ # ----------------------------
217
+ def main():
218
+ with open(INPUT_PATH, "r", encoding="utf-8") as f:
219
+ data = json.load(f)
220
+
221
+ questions = data["items"]
222
+
223
+ with open(OUTPUT_PATH, "w", encoding="utf-8") as out:
224
+ for q in questions:
225
+ support = generate_support(q)
226
+ out.write(json.dumps(support) + "\n")
227
+
228
+ print(f"Generated support bank → {OUTPUT_PATH}")
229
+
230
+
231
+ if __name__ == "__main__":
232
+ main()