atz21 commited on
Commit
9a76d57
·
verified ·
1 Parent(s): b074117

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -294
app.py CHANGED
@@ -1,14 +1,10 @@
1
- import os
 
2
  import json
3
- import random
4
- import google.generativeai as genai
5
- from typing import List, Dict, Any
6
 
7
- # Configure Gemini API
8
- genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
9
-
10
- # IB Math topics list
11
- IB_TOPICS = [
12
  "SL 1.1 - Operations with numbers in the form a × 10k where 1 < a < 10 and k is an integer.",
13
  "SL 1.2 - Arithmetic sequences and series. Use of the formulae for the nth term and the sum of the first n terms of the sequence. Use of sigma notation for sums of arithmetic sequences. Applications. Analysis, interpretation and prediction where a model is not perfectly arithmetic in real life.",
14
  "SL 1.3 - Geometric sequences and series. Use of the formulae for the n th term and the sum of the first n terms of the sequence. Use of sigma notation for the sums of geometric sequences. Applications.",
@@ -92,299 +88,73 @@ IB_TOPICS = [
92
  "AHL 5.17 - Area of the region enclosed by a curve and the y-axis in a given interval. Volumes of revolution about the x-axis or y-axis.",
93
  "AHL 5.18 - First order differential equations. Numerical solution of dy/dx = f(x, y) using Euler's method. Variables separable. Homogeneous differential equation. Solution of y' + P(x)y = Q(x), using the integrating factor.",
94
  "AHL 5.19 - Maclaurin series to obtain expansions for eˣ, sinx, cosx, ln(1+x), (1+x)ᵖ, p∈Q. Use of simple substitution, products, integration and differentiation to obtain other series. Maclaurin series developed from differential equations"
95
- ]
96
 
97
- def create_model():
98
- """Create Gemini model with error handling."""
99
- try:
100
- model = genai.GenerativeModel("gemini-2.0-flash-exp", generation_config={"temperature": 0})
101
- return model
102
- except Exception as e:
103
- try:
104
- model = genai.GenerativeModel("gemini-1.5-flash", generation_config={"temperature": 0})
105
- return model
106
- except Exception as e2:
107
- raise Exception(f"Failed to create Gemini model: {e2}")
108
 
109
- def identify_topics_with_gemini(qp_content: str, graded_as: str) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  """
111
- Send QP content and grading results to Gemini to identify topics for each question.
112
- """
113
- model = create_model()
114
-
115
- topics_list = "\n".join([f"- {topic}" for topic in IB_TOPICS])
116
-
117
- prompt = f"""You are an IB Mathematics expert. Analyze the following question paper content and grading results to identify the specific IB Math topic for each question.
118
-
119
- QUESTION PAPER CONTENT:
120
- {qp_content}
121
 
122
- GRADING RESULTS:
123
- {graded_as}
 
 
 
124
 
125
- IB MATH TOPICS LIST:
126
- {topics_list}
127
 
128
- For each question in the question paper, identify which specific topic from the list above it belongs to. Return your analysis in JSON format:
 
 
 
 
129
 
130
- {{
131
- "topic_analysis": [
132
- {{
133
- "question_id": "1",
134
- "topic": "SL 2.6",
135
- "confidence": "high",
136
- "reasoning": "Question involves quadratic functions and finding vertex form"
137
- }},
138
- {{
139
- "question_id": "2",
140
- "topic": "SL 4.7",
141
- "confidence": "medium",
142
- "reasoning": "Question deals with discrete probability distributions"
143
- }}
144
- ],
145
- "incorrect_topics": ["SL 2.6", "SL 4.7"],
146
- "correct_topics": ["SL 1.2", "SL 3.4"]
147
- }}
148
-
149
- Focus on:
150
- 1. Identifying the exact topic code from the provided list
151
- 2. Determining which questions were answered incorrectly vs correctly based on the grading
152
- 3. Providing clear reasoning for topic identification
153
- """
154
-
155
- try:
156
- response = model.generate_content(prompt)
157
- response_text = response.text
158
-
159
- # Extract JSON from response
160
- import re
161
- json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
162
- if json_match:
163
- return json.loads(json_match.group())
164
- else:
165
- raise ValueError("No JSON found in response")
166
-
167
- except Exception as e:
168
- print(f"Error in topic identification: {e}")
169
- return {"topic_analysis": [], "incorrect_topics": [], "correct_topics": []}
170
-
171
- def load_questions_database(file_path: str = "merged_gemini_output.txt") -> List[Dict]:
172
- """
173
- Load questions from the database file.
174
- Expected format: JSON objects with year, month, question_number, topic, content fields.
175
  """
176
- questions = []
177
-
178
- if not os.path.exists(file_path):
179
- print(f"Warning: Questions database file '{file_path}' not found.")
180
- return questions
181
-
182
- try:
183
- with open(file_path, 'r', encoding='utf-8') as f:
184
- content = f.read()
185
-
186
- # Parse the content - assuming it contains JSON objects
187
- # Handle the format described in the prompt
188
- import re
189
- json_objects = re.findall(r'\{[^}]+\}', content)
190
-
191
- for json_str in json_objects:
192
- try:
193
- question = json.loads(json_str)
194
- questions.append(question)
195
- except json.JSONDecodeError:
196
- continue
197
-
198
- except Exception as e:
199
- print(f"Error loading questions database: {e}")
200
-
201
- return questions
202
 
203
- def generate_smart_test(topic_analysis: Dict[str, Any], questions_db: List[Dict]) -> Dict[str, Any]:
204
- """
205
- Generate a smart test based on the topic analysis and available questions.
206
-
207
- Recommended composition (8-question example):
208
- 1. 4 remediation items - each tied to specific concepts the student got wrong
209
- - 2 items that are near-transfer (very similar to original wrong items, scaffolded)
210
- - 2 items that are far-transfer/applied (same concept but in new context)
211
- 2. 2 retention items - on topics the student got right (check for forgetting)
212
- 3. 1 synthesis/higher-order item - combine multiple concepts
213
- 4. 1 quick confidence/metacognition item
214
- """
215
-
216
- incorrect_topics = topic_analysis.get("incorrect_topics", [])
217
- correct_topics = topic_analysis.get("correct_topics", [])
218
-
219
- # Group questions by topic
220
- questions_by_topic = {}
221
- for question in questions_db:
222
- topic = question.get("topic", "")
223
- if topic not in questions_by_topic:
224
- questions_by_topic[topic] = []
225
- questions_by_topic[topic].append(question)
226
-
227
- test_questions = []
228
-
229
- # 1. Remediation items (4 questions from incorrect topics)
230
- remediation_questions = []
231
- for topic in incorrect_topics[:2]: # Focus on first 2 incorrect topics
232
- if topic in questions_by_topic:
233
- available = questions_by_topic[topic]
234
- if len(available) >= 2:
235
- # 1 near-transfer, 1 far-transfer per topic
236
- selected = random.sample(available, min(2, len(available)))
237
- remediation_questions.extend(selected)
238
-
239
- # Ensure we have 4 remediation questions
240
- while len(remediation_questions) < 4 and incorrect_topics:
241
- for topic in incorrect_topics:
242
- if topic in questions_by_topic and len(remediation_questions) < 4:
243
- available = [q for q in questions_by_topic[topic] if q not in remediation_questions]
244
- if available:
245
- remediation_questions.append(random.choice(available))
246
-
247
- test_questions.extend(remediation_questions[:4])
248
-
249
- # 2. Retention items (2 questions from correct topics)
250
- retention_questions = []
251
- for topic in correct_topics[:2]:
252
- if topic in questions_by_topic:
253
- available = [q for q in questions_by_topic[topic] if q not in test_questions]
254
- if available:
255
- retention_questions.append(random.choice(available))
256
-
257
- test_questions.extend(retention_questions[:2])
258
-
259
- # 3. Synthesis question (1 question combining concepts)
260
- # For now, pick a complex question from available topics
261
- synthesis_candidates = []
262
- all_topics = list(set(incorrect_topics + correct_topics))
263
- for topic in all_topics:
264
- if topic in questions_by_topic:
265
- available = [q for q in questions_by_topic[topic] if q not in test_questions]
266
- synthesis_candidates.extend(available)
267
-
268
- if synthesis_candidates:
269
- test_questions.append(random.choice(synthesis_candidates))
270
-
271
- # 4. Metacognition item (create a simple confidence question)
272
- metacognition_question = {
273
- "question_number": "META",
274
- "topic": "Metacognition",
275
- "content": "Rate your confidence level (1-5) for each of the questions above and briefly explain your reasoning for one question where you felt least confident.",
276
- "year": "N/A",
277
- "month": "N/A"
278
- }
279
- test_questions.append(metacognition_question)
280
-
281
- return {
282
- "test_questions": test_questions,
283
- "composition": {
284
- "remediation_items": len(remediation_questions),
285
- "retention_items": len(retention_questions),
286
- "synthesis_items": 1,
287
- "metacognition_items": 1,
288
- "total_questions": len(test_questions)
289
- },
290
- "focus_topics": {
291
- "incorrect_topics": incorrect_topics,
292
- "correct_topics": correct_topics
293
- }
294
- }
295
-
296
- def format_test_output(smart_test: Dict[str, Any]) -> str:
297
- """Format the generated test for display."""
298
- output = []
299
- output.append("# SMART ADAPTIVE TEST")
300
- output.append("=" * 50)
301
- output.append("")
302
-
303
- # Test composition summary
304
- comp = smart_test["composition"]
305
- output.append("## Test Composition:")
306
- output.append(f"- Remediation items: {comp['remediation_items']}")
307
- output.append(f"- Retention items: {comp['retention_items']}")
308
- output.append(f"- Synthesis items: {comp['synthesis_items']}")
309
- output.append(f"- Metacognition items: {comp['metacognition_items']}")
310
- output.append(f"- Total questions: {comp['total_questions']}")
311
- output.append("")
312
-
313
- # Focus topics
314
- focus = smart_test["focus_topics"]
315
- output.append("## Focus Areas:")
316
- output.append("### Topics to remediate:")
317
- for topic in focus["incorrect_topics"]:
318
- output.append(f"- {topic}")
319
- output.append("")
320
- output.append("### Topics to retain:")
321
- for topic in focus["correct_topics"]:
322
- output.append(f"- {topic}")
323
- output.append("")
324
-
325
- # Questions
326
- output.append("## Test Questions:")
327
- output.append("")
328
-
329
- for i, question in enumerate(smart_test["test_questions"], 1):
330
- output.append(f"### Question {i}")
331
- output.append(f"**Topic:** {question.get('topic', 'N/A')}")
332
- if question.get('year') != 'N/A':
333
- output.append(f"**Source:** {question.get('month', '')} {question.get('year', '')}")
334
- output.append("")
335
- output.append(question.get('content', ''))
336
- output.append("")
337
- output.append("-" * 30)
338
- output.append("")
339
-
340
- return "\n".join(output)
341
 
342
- def smart_test_pipeline(qp_content: str, graded_as: str) -> str:
343
- """
344
- Main pipeline function that takes QP content and grading results,
345
- identifies topics, and generates a smart adaptive test.
346
- """
347
- print("🔍 Starting smart test generation...")
348
-
349
- # Step 1: Identify topics using Gemini
350
- print("📊 Analyzing topics with Gemini...")
351
- topic_analysis = identify_topics_with_gemini(qp_content, graded_as)
352
-
353
- if not topic_analysis.get("topic_analysis"):
354
- return " Error: Could not analyze topics from the provided content."
355
-
356
- print(f"✅ Identified {len(topic_analysis.get('incorrect_topics', []))} incorrect topics and {len(topic_analysis.get('correct_topics', []))} correct topics")
357
-
358
- # Step 2: Load questions database
359
- print("📚 Loading questions database...")
360
- questions_db = load_questions_database()
361
-
362
- if not questions_db:
363
- return "❌ Error: No questions database found. Please ensure 'merged_gemini_output.txt' exists."
364
-
365
- print(f"✅ Loaded {len(questions_db)} questions from database")
366
-
367
- # Step 3: Generate smart test
368
- print("🎯 Generating adaptive test...")
369
- smart_test = generate_smart_test(topic_analysis, questions_db)
370
-
371
- # Step 4: Format output
372
- formatted_test = format_test_output(smart_test)
373
-
374
- print("✅ Smart test generated successfully!")
375
- return formatted_test
376
 
377
- if __name__ == "__main__":
378
- # Example usage
379
- sample_qp = """
380
- Question 1: Solve the quadratic equation x² + 5x + 6 = 0
381
- Question 2: A discrete random variable X has the probability distribution P(X = x) = cx(5-x) for x = 1,2,3,4. Find the value of c.
382
- """
383
-
384
- sample_graded = """
385
- Question 1: Incorrect - Student got x = -2, -4 instead of x = -2, -3
386
- Question 2: Correct - Student correctly found c = 1/30
387
- """
388
-
389
- result = smart_test_pipeline(sample_qp, sample_graded)
390
- print(result)
 
1
+ import gradio as gr
2
+ import PyPDF2
3
  import json
4
+ from some_llm_api import call_gemini_llm # replace with actual API call
 
 
5
 
6
+ # Predefined topic list (abbreviated for brevity; include all topics in practice)
7
+ TOPICS = [
 
 
 
8
  "SL 1.1 - Operations with numbers in the form a × 10k where 1 < a < 10 and k is an integer.",
9
  "SL 1.2 - Arithmetic sequences and series. Use of the formulae for the nth term and the sum of the first n terms of the sequence. Use of sigma notation for sums of arithmetic sequences. Applications. Analysis, interpretation and prediction where a model is not perfectly arithmetic in real life.",
10
  "SL 1.3 - Geometric sequences and series. Use of the formulae for the n th term and the sum of the first n terms of the sequence. Use of sigma notation for the sums of geometric sequences. Applications.",
 
88
  "AHL 5.17 - Area of the region enclosed by a curve and the y-axis in a given interval. Volumes of revolution about the x-axis or y-axis.",
89
  "AHL 5.18 - First order differential equations. Numerical solution of dy/dx = f(x, y) using Euler's method. Variables separable. Homogeneous differential equation. Solution of y' + P(x)y = Q(x), using the integrating factor.",
90
  "AHL 5.19 - Maclaurin series to obtain expansions for eˣ, sinx, cosx, ln(1+x), (1+x)ᵖ, p∈Q. Use of simple substitution, products, integration and differentiation to obtain other series. Maclaurin series developed from differential equations"
 
91
 
92
+ ]
 
 
 
 
 
 
 
 
 
 
93
 
94
+ def extract_pdf_text(pdf_file):
95
+ reader = PyPDF2.PdfReader(pdf_file)
96
+ text = ""
97
+ for page in reader.pages:
98
+ text += page.extract_text() + "\n"
99
+ return text
100
+
101
+ def process_qp_and_graded(qp_file, graded_file):
102
+ # Step 1: Extract text
103
+ qp_text = extract_pdf_text(qp_file)
104
+ graded_text = extract_pdf_text(graded_file)
105
+
106
+ # Step 2: Call Gemini LLM to identify topics for each question
107
+ llm_prompt = f"""
108
+ You are a math expert. Identify the topic for each question in the following question paper
109
+ from this list: {', '.join(TOPICS)}.
110
+ Return JSON in the following format:
111
+ [
112
+ {{
113
+ "question_number": 1,
114
+ "topic": "SL 1.1",
115
+ "content": "The question text"
116
+ }},
117
+ ...
118
+ ]
119
+ Question paper text:
120
+ {qp_text}
121
  """
122
+ identified_questions = call_gemini_llm(llm_prompt)
 
 
 
 
 
 
 
 
 
123
 
124
+ # Step 3: Generate new 8-question test based on student's graded answers
125
+ # Prompt LLM to generate new test using the recommended composition
126
+ llm_test_prompt = f"""
127
+ You are a math teacher. Based on the graded answers below:
128
+ {graded_text}
129
 
130
+ And the following questions with topics:
131
+ {identified_questions}
132
 
133
+ Create a new 8-question test with the following composition:
134
+ - 4 remediation items based on wrong answers (2 near-transfer, 2 far-transfer)
135
+ - 2 retention items on topics the student got right
136
+ - 1 synthesis / higher-order item combining multiple concepts
137
+ - 1 confidence/metacognition item
138
 
139
+ Return JSON with question_number, topic, and content.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  """
141
+ new_test = call_gemini_llm(llm_test_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ return json.dumps(identified_questions, indent=2), json.dumps(new_test, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
+ # Gradio interface
146
+ iface = gr.Interface(
147
+ fn=process_qp_and_graded,
148
+ inputs=[
149
+ gr.File(label="Question Paper PDF"),
150
+ gr.File(label="Graded Answers PDF")
151
+ ],
152
+ outputs=[
153
+ gr.Textbox(label="Questions with Topics", lines=20),
154
+ gr.Textbox(label="Generated 8-Question Test", lines=20)
155
+ ],
156
+ title="Math Question Topic Identifier & Test Generator",
157
+ description="Upload the student's question paper and graded answers PDFs. This app identifies question topics and generates a new targeted test."
158
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ iface.launch()