Spaces:

atz21
/

smart_test

Sleeping

App Files Files Community

atz21 commited on Oct 6, 2025

Commit

9a76d57

verified ·

1 Parent(s): b074117

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -294

app.py CHANGED Viewed

@@ -1,14 +1,10 @@
-import os
 import json
-import random
-import google.generativeai as genai
-from typing import List, Dict, Any
-# Configure Gemini API
-genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
-# IB Math topics list
-IB_TOPICS = [
     "SL 1.1 - Operations with numbers in the form a × 10k where 1 < a < 10 and k is an integer.",
     "SL 1.2 - Arithmetic sequences and series. Use of the formulae for the nth term and the sum of the first n terms of the sequence. Use of sigma notation for sums of arithmetic sequences. Applications. Analysis, interpretation and prediction where a model is not perfectly arithmetic in real life.",
     "SL 1.3 - Geometric sequences and series. Use of the formulae for the n th term and the sum of the first n terms of the sequence. Use of sigma notation for the sums of geometric sequences. Applications.",
@@ -92,299 +88,73 @@ IB_TOPICS = [
     "AHL 5.17 - Area of the region enclosed by a curve and the y-axis in a given interval. Volumes of revolution about the x-axis or y-axis.",
     "AHL 5.18 - First order differential equations. Numerical solution of dy/dx = f(x, y) using Euler's method. Variables separable. Homogeneous differential equation. Solution of y' + P(x)y = Q(x), using the integrating factor.",
     "AHL 5.19 - Maclaurin series to obtain expansions for eˣ, sinx, cosx, ln(1+x), (1+x)ᵖ, p∈Q. Use of simple substitution, products, integration and differentiation to obtain other series. Maclaurin series developed from differential equations"
-]
-def create_model():
-    """Create Gemini model with error handling."""
-    try:
-        model = genai.GenerativeModel("gemini-2.0-flash-exp", generation_config={"temperature": 0})
-        return model
-    except Exception as e:
-        try:
-            model = genai.GenerativeModel("gemini-1.5-flash", generation_config={"temperature": 0})
-            return model
-        except Exception as e2:
-            raise Exception(f"Failed to create Gemini model: {e2}")
-def identify_topics_with_gemini(qp_content: str, graded_as: str) -> Dict[str, Any]:
     """
-    Send QP content and grading results to Gemini to identify topics for each question.
-    """
-    model = create_model()
-    topics_list = "\n".join([f"- {topic}" for topic in IB_TOPICS])
-    prompt = f"""You are an IB Mathematics expert. Analyze the following question paper content and grading results to identify the specific IB Math topic for each question.
-QUESTION PAPER CONTENT:
-{qp_content}
-GRADING RESULTS:
-{graded_as}
-IB MATH TOPICS LIST:
-{topics_list}
-For each question in the question paper, identify which specific topic from the list above it belongs to. Return your analysis in JSON format:
-{{
-    "topic_analysis": [
-        {{
-            "question_id": "1",
-            "topic": "SL 2.6",
-            "confidence": "high",
-            "reasoning": "Question involves quadratic functions and finding vertex form"
-        }},
-        {{
-            "question_id": "2",
-            "topic": "SL 4.7",
-            "confidence": "medium",
-            "reasoning": "Question deals with discrete probability distributions"
-        }}
-    ],
-    "incorrect_topics": ["SL 2.6", "SL 4.7"],
-    "correct_topics": ["SL 1.2", "SL 3.4"]
-}}
-Focus on:
-1. Identifying the exact topic code from the provided list
-2. Determining which questions were answered incorrectly vs correctly based on the grading
-3. Providing clear reasoning for topic identification
-"""
-    try:
-        response = model.generate_content(prompt)
-        response_text = response.text
-        # Extract JSON from response
-        import re
-        json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
-        if json_match:
-            return json.loads(json_match.group())
-        else:
-            raise ValueError("No JSON found in response")
-    except Exception as e:
-        print(f"Error in topic identification: {e}")
-        return {"topic_analysis": [], "incorrect_topics": [], "correct_topics": []}
-def load_questions_database(file_path: str = "merged_gemini_output.txt") -> List[Dict]:
-    """
-    Load questions from the database file.
-    Expected format: JSON objects with year, month, question_number, topic, content fields.
     """
-    questions = []
-    if not os.path.exists(file_path):
-        print(f"Warning: Questions database file '{file_path}' not found.")
-        return questions
-    try:
-        with open(file_path, 'r', encoding='utf-8') as f:
-            content = f.read()
-        # Parse the content - assuming it contains JSON objects
-        # Handle the format described in the prompt
-        import re
-        json_objects = re.findall(r'\{[^}]+\}', content)
-        for json_str in json_objects:
-            try:
-                question = json.loads(json_str)
-                questions.append(question)
-            except json.JSONDecodeError:
-                continue
-    except Exception as e:
-        print(f"Error loading questions database: {e}")
-    return questions
-def generate_smart_test(topic_analysis: Dict[str, Any], questions_db: List[Dict]) -> Dict[str, Any]:
-    """
-    Generate a smart test based on the topic analysis and available questions.
-    Recommended composition (8-question example):
-    1. 4 remediation items - each tied to specific concepts the student got wrong
-       - 2 items that are near-transfer (very similar to original wrong items, scaffolded)
-       - 2 items that are far-transfer/applied (same concept but in new context)
-    2. 2 retention items - on topics the student got right (check for forgetting)
-    3. 1 synthesis/higher-order item - combine multiple concepts
-    4. 1 quick confidence/metacognition item
-    """
-    incorrect_topics = topic_analysis.get("incorrect_topics", [])
-    correct_topics = topic_analysis.get("correct_topics", [])
-    # Group questions by topic
-    questions_by_topic = {}
-    for question in questions_db:
-        topic = question.get("topic", "")
-        if topic not in questions_by_topic:
-            questions_by_topic[topic] = []
-        questions_by_topic[topic].append(question)
-    test_questions = []
-    # 1. Remediation items (4 questions from incorrect topics)
-    remediation_questions = []
-    for topic in incorrect_topics[:2]:  # Focus on first 2 incorrect topics
-        if topic in questions_by_topic:
-            available = questions_by_topic[topic]
-            if len(available) >= 2:
-                # 1 near-transfer, 1 far-transfer per topic
-                selected = random.sample(available, min(2, len(available)))
-                remediation_questions.extend(selected)
-    # Ensure we have 4 remediation questions
-    while len(remediation_questions) < 4 and incorrect_topics:
-        for topic in incorrect_topics:
-            if topic in questions_by_topic and len(remediation_questions) < 4:
-                available = [q for q in questions_by_topic[topic] if q not in remediation_questions]
-                if available:
-                    remediation_questions.append(random.choice(available))
-    test_questions.extend(remediation_questions[:4])
-    # 2. Retention items (2 questions from correct topics)
-    retention_questions = []
-    for topic in correct_topics[:2]:
-        if topic in questions_by_topic:
-            available = [q for q in questions_by_topic[topic] if q not in test_questions]
-            if available:
-                retention_questions.append(random.choice(available))
-    test_questions.extend(retention_questions[:2])
-    # 3. Synthesis question (1 question combining concepts)
-    # For now, pick a complex question from available topics
-    synthesis_candidates = []
-    all_topics = list(set(incorrect_topics + correct_topics))
-    for topic in all_topics:
-        if topic in questions_by_topic:
-            available = [q for q in questions_by_topic[topic] if q not in test_questions]
-            synthesis_candidates.extend(available)
-    if synthesis_candidates:
-        test_questions.append(random.choice(synthesis_candidates))
-    # 4. Metacognition item (create a simple confidence question)
-    metacognition_question = {
-        "question_number": "META",
-        "topic": "Metacognition",
-        "content": "Rate your confidence level (1-5) for each of the questions above and briefly explain your reasoning for one question where you felt least confident.",
-        "year": "N/A",
-        "month": "N/A"
-    }
-    test_questions.append(metacognition_question)
-    return {
-        "test_questions": test_questions,
-        "composition": {
-            "remediation_items": len(remediation_questions),
-            "retention_items": len(retention_questions),
-            "synthesis_items": 1,
-            "metacognition_items": 1,
-            "total_questions": len(test_questions)
-        },
-        "focus_topics": {
-            "incorrect_topics": incorrect_topics,
-            "correct_topics": correct_topics
-        }
-    }
-def format_test_output(smart_test: Dict[str, Any]) -> str:
-    """Format the generated test for display."""
-    output = []
-    output.append("# SMART ADAPTIVE TEST")
-    output.append("=" * 50)
-    output.append("")
-    # Test composition summary
-    comp = smart_test["composition"]
-    output.append("## Test Composition:")
-    output.append(f"- Remediation items: {comp['remediation_items']}")
-    output.append(f"- Retention items: {comp['retention_items']}")
-    output.append(f"- Synthesis items: {comp['synthesis_items']}")
-    output.append(f"- Metacognition items: {comp['metacognition_items']}")
-    output.append(f"- Total questions: {comp['total_questions']}")
-    output.append("")
-    # Focus topics
-    focus = smart_test["focus_topics"]
-    output.append("## Focus Areas:")
-    output.append("### Topics to remediate:")
-    for topic in focus["incorrect_topics"]:
-        output.append(f"- {topic}")
-    output.append("")
-    output.append("### Topics to retain:")
-    for topic in focus["correct_topics"]:
-        output.append(f"- {topic}")
-    output.append("")
-    # Questions
-    output.append("## Test Questions:")
-    output.append("")
-    for i, question in enumerate(smart_test["test_questions"], 1):
-        output.append(f"### Question {i}")
-        output.append(f"**Topic:** {question.get('topic', 'N/A')}")
-        if question.get('year') != 'N/A':
-            output.append(f"**Source:** {question.get('month', '')} {question.get('year', '')}")
-        output.append("")
-        output.append(question.get('content', ''))
-        output.append("")
-        output.append("-" * 30)
-        output.append("")
-    return "\n".join(output)
-def smart_test_pipeline(qp_content: str, graded_as: str) -> str:
-    """
-    Main pipeline function that takes QP content and grading results,
-    identifies topics, and generates a smart adaptive test.
-    """
-    print("🔍 Starting smart test generation...")
-    # Step 1: Identify topics using Gemini
-    print("📊 Analyzing topics with Gemini...")
-    topic_analysis = identify_topics_with_gemini(qp_content, graded_as)
-    if not topic_analysis.get("topic_analysis"):
-        return "❌ Error: Could not analyze topics from the provided content."
-    print(f"✅ Identified {len(topic_analysis.get('incorrect_topics', []))} incorrect topics and {len(topic_analysis.get('correct_topics', []))} correct topics")
-    # Step 2: Load questions database
-    print("📚 Loading questions database...")
-    questions_db = load_questions_database()
-    if not questions_db:
-        return "❌ Error: No questions database found. Please ensure 'merged_gemini_output.txt' exists."
-    print(f"✅ Loaded {len(questions_db)} questions from database")
-    # Step 3: Generate smart test
-    print("🎯 Generating adaptive test...")
-    smart_test = generate_smart_test(topic_analysis, questions_db)
-    # Step 4: Format output
-    formatted_test = format_test_output(smart_test)
-    print("✅ Smart test generated successfully!")
-    return formatted_test
-if __name__ == "__main__":
-    # Example usage
-    sample_qp = """
-    Question 1: Solve the quadratic equation x² + 5x + 6 = 0
-    Question 2: A discrete random variable X has the probability distribution P(X = x) = cx(5-x) for x = 1,2,3,4. Find the value of c.
-    """
-    sample_graded = """
-    Question 1: Incorrect - Student got x = -2, -4 instead of x = -2, -3
-    Question 2: Correct - Student correctly found c = 1/30
-    """
-    result = smart_test_pipeline(sample_qp, sample_graded)
-    print(result)

+import gradio as gr
+import PyPDF2
 import json
+from some_llm_api import call_gemini_llm  # replace with actual API call
+# Predefined topic list (abbreviated for brevity; include all topics in practice)
+TOPICS = [
     "SL 1.1 - Operations with numbers in the form a × 10k where 1 < a < 10 and k is an integer.",
     "SL 1.2 - Arithmetic sequences and series. Use of the formulae for the nth term and the sum of the first n terms of the sequence. Use of sigma notation for sums of arithmetic sequences. Applications. Analysis, interpretation and prediction where a model is not perfectly arithmetic in real life.",
     "SL 1.3 - Geometric sequences and series. Use of the formulae for the n th term and the sum of the first n terms of the sequence. Use of sigma notation for the sums of geometric sequences. Applications.",
     "AHL 5.17 - Area of the region enclosed by a curve and the y-axis in a given interval. Volumes of revolution about the x-axis or y-axis.",
     "AHL 5.18 - First order differential equations. Numerical solution of dy/dx = f(x, y) using Euler's method. Variables separable. Homogeneous differential equation. Solution of y' + P(x)y = Q(x), using the integrating factor.",
     "AHL 5.19 - Maclaurin series to obtain expansions for eˣ, sinx, cosx, ln(1+x), (1+x)ᵖ, p∈Q. Use of simple substitution, products, integration and differentiation to obtain other series. Maclaurin series developed from differential equations"
+]
+def extract_pdf_text(pdf_file):
+    reader = PyPDF2.PdfReader(pdf_file)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text() + "\n"
+    return text
+def process_qp_and_graded(qp_file, graded_file):
+    # Step 1: Extract text
+    qp_text = extract_pdf_text(qp_file)
+    graded_text = extract_pdf_text(graded_file)
+    # Step 2: Call Gemini LLM to identify topics for each question
+    llm_prompt = f"""
+    You are a math expert. Identify the topic for each question in the following question paper
+    from this list: {', '.join(TOPICS)}.
+    Return JSON in the following format:
+    [
+      {{
+        "question_number": 1,
+        "topic": "SL 1.1",
+        "content": "The question text"
+      }},
+      ...
+    ]
+    Question paper text:
+    {qp_text}
     """
+    identified_questions = call_gemini_llm(llm_prompt)
+    # Step 3: Generate new 8-question test based on student's graded answers
+    # Prompt LLM to generate new test using the recommended composition
+    llm_test_prompt = f"""
+    You are a math teacher. Based on the graded answers below:
+    {graded_text}
+    And the following questions with topics:
+    {identified_questions}
+    Create a new 8-question test with the following composition:
+    - 4 remediation items based on wrong answers (2 near-transfer, 2 far-transfer)
+    - 2 retention items on topics the student got right
+    - 1 synthesis / higher-order item combining multiple concepts
+    - 1 confidence/metacognition item
+    Return JSON with question_number, topic, and content.
     """
+    new_test = call_gemini_llm(llm_test_prompt)
+    return json.dumps(identified_questions, indent=2), json.dumps(new_test, indent=2)
+# Gradio interface
+iface = gr.Interface(
+    fn=process_qp_and_graded,
+    inputs=[
+        gr.File(label="Question Paper PDF"),
+        gr.File(label="Graded Answers PDF")
+    ],
+    outputs=[
+        gr.Textbox(label="Questions with Topics", lines=20),
+        gr.Textbox(label="Generated 8-Question Test", lines=20)
+    ],
+    title="Math Question Topic Identifier & Test Generator",
+    description="Upload the student's question paper and graded answers PDFs. This app identifies question topics and generates a new targeted test."
+)
+iface.launch()