neurolearn

Sleeping

App Files Files Community

atz21 commited on Sep 18, 2025

Commit

7884d2d

verified ·

1 Parent(s): 9dcc575

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -61

app.py CHANGED Viewed

@@ -1,15 +1,21 @@
 import os
 import gradio as gr
 import google.generativeai as genai
 from markdown_pdf import MarkdownPdf, Section
-import subprocess
 # ---------- PROMPTS ----------
 PROMPTS = {
     "ALIGNMENT_PROMPT": {
         "role": "system",
         "content": """Developer: Align QP, MS, and AS into structured JSON format.
 ## Instructions:
 - Each question must include:
   - `id` (question/sub-question number, e.g., "1", "2.a")
@@ -19,39 +25,22 @@ PROMPTS = {
   - `as` (student’s steps, numerical values, and notes)
 - Include `total_verification` in MS showing explicit mark breakdown.
 - The structure must be **valid JSON only**.
 ## Example JSON:
 {
   "questions": [
     {
       "id": "1",
-      "qp": "Ramiro walks to work each morning. During the first minute he walks 80 metres. In each subsequent minute he walks 90% of the distance walked during the previous minute.\\nThe distance between his house and work is 660 metres. Ramiro leaves his house at 08:00 and has to be at work by 08:15.\\nExplain why he will not be at work on time.",
       "total_marks": 7,
       "ms": {
         "marks": [
-          { "id": "M1_1", "desc": "Recognise that the distance each minute forms a geometric sequence; show r = 0.9 (method mark)." },
-          { "id": "M1_2", "desc": "Recognise that total distance is the sum of a geometric sequence and give the sum formula (method mark)." },
-          { "id": "M1_3", "desc": "List at least 5 correct terms of the GP (method mark)." },
-          { "id": "A1_list", "desc": "List all 15 correct terms (accuracy mark)." },
-          { "id": "M1_4", "desc": "Attempt to find S_15 (method mark)." },
-          { "id": "A1_sum", "desc": "Correct numerical value for S_15 ≈ 635.287 (accuracy mark)." },
-          { "id": "R1", "desc": "Conclude: since S < 660, he will not be there on time (requires preceding A mark)." }
         ],
-        "total_verification": "M1 + M1 + M1 + A1 + M1 + A1 + R1 = 7"
       },
       "as": {
-        "steps": [
-          "90% of 80 = 72 (2nd minute).",
-          "90% of 72 = 64.8 (3rd minute).",
-          "Sequence shown: 80, 72, 64.8, 58.32.",
-          "r = 72/80 = 0.9 ; also 64.8/72 = 0.9.",
-          "u_n = u_1 * r^(n-1).",
-          "S_n = u_1 * (r^n - 1)/(r - 1).",
-          "S_15 = 80 * (0.9^15 - 1)/(0.9 - 1).",
-          "S_15 = 635.29 (approx)."
-        ],
-        "numeric_S15": 635.29,
-        "notes": "Student found r and used the sum formula correctly, listed only 4 terms, got S15 ≈ 635.29 but did not explicitly state the final conclusion."
       }
     }
   ]
@@ -61,7 +50,6 @@ PROMPTS = {
     "GRADING_PROMPT": {
         "role": "system",
         "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
 ### Abbreviations:
 - **M**: Marks for Method
 - **A**: Marks for Accuracy/Answer
@@ -69,7 +57,6 @@ PROMPTS = {
 - **AG**: Answer given in question—no marks
 - **FT**: Follow Through marks (if error carried forward correctly)
 - **MR**: Deduct for misread (once only)
 ---
 ## Grading Instructions
 1. Award marks using official annotations (e.g., M1, A2).
@@ -79,51 +66,41 @@ PROMPTS = {
 5. Apply FT where appropriate.
 6. Use proper notation: M1A0, A1, etc.
 7. Any lost mark: use red `<span style="color:red">M0</span>` and make Reason red.
 ---
 ## Output Format
 Produce two sections per question/sub-question:
 ---
-## Question X (and sub-question if applicable)
 ### Markscheme vs Student Answer
 | Mark ID | Markscheme Expectation | Student’s Response | Awarded |
 |---------|------------------------|--------------------|---------|
-| M1_1    | Recognise GP, r=0.9    | "r = 72/80 = 0.9" | M1 |
-| M1_2    | Sum formula for GP     | "S_n = u1(r^n-1)/(r-1)" | M1 |
-| A1_list | 15 terms listed        | Only 4 terms shown | <span style="color:red">A0</span> |
-| …       | …                      | …                  | … |
 ➡️ **Total: 6/7**
 ---
 ### Examiner’s Report
 At the very end, provide a summary table:
 | Question Number | Marks | Remark |
 |-----------------|-------|--------|
 | 1               | 6/7   | C      |
-| 2.a             | 9/9   | A      |
 Then show total clearly:
-`Total: 15/16`"""
     }
 }
 # -------------------- CONFIG --------------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
-# ---------- HELPER: Save to PDF ----------
 def save_as_pdf(text, filename="output.pdf"):
     pdf = MarkdownPdf()
     pdf.add_section(Section(text, toc=False))
     pdf.save(filename)
     return filename
-# ---------- HELPER: Compress PDF ----------
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
     if output_path is None:
         base, ext = os.path.splitext(input_path)
         output_path = f"{base}_compressed{ext}"
@@ -141,27 +118,72 @@ def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
         ]
         subprocess.run(gs_cmd, check=True)
         if os.path.getsize(output_path) <= max_size:
-            print(f"✅ Compressed {input_path} → {output_path}")
             return output_path
         else:
-            print(f"⚠️ Compression failed to reduce below {max_size/1024/1024} MB")
             return input_path
-    except Exception as e:
-        print(f"⚠️ Compression error: {e}")
         return input_path
-# ---------- HELPER: Create Model with Fallback ----------
 def create_model():
     try:
-        print("⚡ Using gemini-2.5-pro model")
         return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
     except Exception:
-        print("⚡ Falling back to gemini-2.5-flash model")
         return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
-# ---------- PIPELINE: ALIGN + GRADE ----------
-def align_and_grade(qp_file, ms_file, ans_file):
     try:
         qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
         ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
         ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
@@ -172,7 +194,7 @@ def align_and_grade(qp_file, ms_file, ans_file):
         model = create_model()
-        # ---------------- STEP 1: ALIGN (JSON only) ----------------
         resp = model.generate_content([
             PROMPTS["ALIGNMENT_PROMPT"]["content"],
             qp_uploaded,
@@ -183,7 +205,7 @@ def align_and_grade(qp_file, ms_file, ans_file):
         if not json_output and resp.candidates:
             json_output = resp.candidates[0].content.parts[0].text
-        # ---------------- STEP 2: GRADING (Markdown + PDF) ----------------
         response = model.generate_content([
             PROMPTS["GRADING_PROMPT"]["content"],
             json_output
@@ -195,21 +217,27 @@ def align_and_grade(qp_file, ms_file, ans_file):
         base_name = os.path.splitext(os.path.basename(ans_file))[0]
         grading_pdf_path = save_as_pdf(grading, f"{base_name}_graded.pdf")
-        # Return JSON (alignment), Markdown grading, and PDF
-        return json_output, grading, grading_pdf_path
     except Exception as e:
-        return f"❌ Error: {e}", None, None
 # ---------- GRADIO APP ----------
-with gr.Blocks(title="LeadIB AI Grading (Alignment + Auto-Grading)") as demo:
-    gr.Markdown("## 📘 LeadIB AI Grading\nUpload **Question Paper**, **Markscheme**, and **Student Answer Sheet**.\nThe system will first align into JSON, then auto-grade with detailed feedback.")
     with gr.Row():
         qp_file = gr.File(label="📄 Upload Question Paper (PDF)")
         ms_file = gr.File(label="📄 Upload Markscheme (PDF)")
         ans_file = gr.File(label="📝 Upload Student Answer Sheet (PDF)")
     run_button = gr.Button("🚀 Run Alignment + Grading")
     with gr.Row():
@@ -217,11 +245,12 @@ with gr.Blocks(title="LeadIB AI Grading (Alignment + Auto-Grading)") as demo:
         grading_output = gr.Textbox(label="📝 Step 2: Grading (Markdown)", lines=20)
     grading_pdf = gr.File(label="📥 Download Grading PDF")
     run_button.click(
         fn=align_and_grade,
-        inputs=[qp_file, ms_file, ans_file],
-        outputs=[json_output, grading_output, grading_pdf]
     )
 if __name__ == "__main__":

 import os
+import re
+import json
+import subprocess
+import cv2
+import numpy as np
+import img2pdf
 import gradio as gr
 import google.generativeai as genai
 from markdown_pdf import MarkdownPdf, Section
+from pdf2image import convert_from_path
+from PIL import Image
 # ---------- PROMPTS ----------
 PROMPTS = {
     "ALIGNMENT_PROMPT": {
         "role": "system",
         "content": """Developer: Align QP, MS, and AS into structured JSON format.
 ## Instructions:
 - Each question must include:
   - `id` (question/sub-question number, e.g., "1", "2.a")
   - `as` (student’s steps, numerical values, and notes)
 - Include `total_verification` in MS showing explicit mark breakdown.
 - The structure must be **valid JSON only**.
 ## Example JSON:
 {
   "questions": [
     {
       "id": "1",
+      "qp": "Ramiro walks to work each morning...",
       "total_marks": 7,
       "ms": {
         "marks": [
+          { "id": "M1_1", "desc": "Recognise GP (r=0.9)" }
         ],
+        "total_verification": "M1 + A1 = 2"
       },
       "as": {
+        "steps": ["..."],
+        "notes": "..."
       }
     }
   ]
     "GRADING_PROMPT": {
         "role": "system",
         "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
 ### Abbreviations:
 - **M**: Marks for Method
 - **A**: Marks for Accuracy/Answer
 - **AG**: Answer given in question—no marks
 - **FT**: Follow Through marks (if error carried forward correctly)
 - **MR**: Deduct for misread (once only)
 ---
 ## Grading Instructions
 1. Award marks using official annotations (e.g., M1, A2).
 5. Apply FT where appropriate.
 6. Use proper notation: M1A0, A1, etc.
 7. Any lost mark: use red `<span style="color:red">M0</span>` and make Reason red.
 ---
 ## Output Format
 Produce two sections per question/sub-question:
 ---
+## Question X
 ### Markscheme vs Student Answer
 | Mark ID | Markscheme Expectation | Student’s Response | Awarded |
 |---------|------------------------|--------------------|---------|
+| M1_1    | Recognise GP           | "r=0.9"            | M1 |
 ➡️ **Total: 6/7**
 ---
 ### Examiner’s Report
 At the very end, provide a summary table:
 | Question Number | Marks | Remark |
 |-----------------|-------|--------|
 | 1               | 6/7   | C      |
 Then show total clearly:
+`Total: 6/7`"""
     }
 }
 # -------------------- CONFIG --------------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
+GRID_ROWS, GRID_COLS = 20, 14  # grid for imprint placement
+# ---------- HELPERS ----------
 def save_as_pdf(text, filename="output.pdf"):
     pdf = MarkdownPdf()
     pdf.add_section(Section(text, toc=False))
     pdf.save(filename)
     return filename
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
+    """Compress PDF only if larger than max_size (20MB default)."""
     if output_path is None:
         base, ext = os.path.splitext(input_path)
         output_path = f"{base}_compressed{ext}"
         ]
         subprocess.run(gs_cmd, check=True)
         if os.path.getsize(output_path) <= max_size:
             return output_path
         else:
             return input_path
+    except Exception:
         return input_path
 def create_model():
     try:
         return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
     except Exception:
         return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
+# ---------- Extract marks per question ----------
+def extract_marks_from_grading(grading_text):
+    grading_json = {"grading": []}
+    # Split by question sections
+    question_blocks = re.split(r"## Question\s+", grading_text)
+    for block in question_blocks[1:]:  # skip intro
+        # Extract question ID (like "1(a)" or "2.b")
+        q_match = re.match(r"([\d\.a-zA-Z\(\)]+)", block.strip())
+        if not q_match:
+            continue
+        q_id = q_match.group(1).strip()
+        # Find awarded marks in that block
+        awarded = re.findall(r"\b(M\d+|A\d+|R\d+|M0|A0|R0)\b", block)
+        grading_json["grading"].append({
+            "question": q_id,
+            "marks_awarded": awarded
+        })
+    return grading_json
+# ---------- Imprinting Logic ----------
+def imprint_marks(pdf_path, grading_json, output_pdf):
+    pages = convert_from_path(pdf_path, dpi=200)
+    annotated_pages = []
+    for idx, page in enumerate(pages):
+        img = np.array(page.convert("RGB"))
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        y_offset = 100  # baseline vertical offset
+        for g in grading_json["grading"]:
+            marks_text = ",".join(g["marks_awarded"])
+            # Simple placement: stack vertically
+            cv2.putText(img, f"{g['question']}: {marks_text}",
+                        (50, y_offset),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        1.2, (0, 0, 255), 3, cv2.LINE_AA)
+            y_offset += 50
+        annotated_path = f"annotated_{idx+1}.png"
+        cv2.imwrite(annotated_path, img)
+        annotated_pages.append(annotated_path)
+    with open(output_pdf, "wb") as f:
+        f.write(img2pdf.convert(annotated_pages))
+    return compress_pdf(output_pdf)
+# ---------- PIPELINE ----------
+def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
     try:
+        # Compress only if >20MB
         qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
         ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
         ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
         model = create_model()
+        # ---- Step 1: ALIGN (JSON only)
         resp = model.generate_content([
             PROMPTS["ALIGNMENT_PROMPT"]["content"],
             qp_uploaded,
         if not json_output and resp.candidates:
             json_output = resp.candidates[0].content.parts[0].text
+        # ---- Step 2: GRADING (Markdown)
         response = model.generate_content([
             PROMPTS["GRADING_PROMPT"]["content"],
             json_output
         base_name = os.path.splitext(os.path.basename(ans_file))[0]
         grading_pdf_path = save_as_pdf(grading, f"{base_name}_graded.pdf")
+        # ---- Step 3 (Optional): Imprint marks on answer PDF ----
+        imprint_pdf_path = None
+        if imprint:
+            grading_json = extract_marks_from_grading(grading)
+            imprint_pdf_path = imprint_marks(ans_file, grading_json, f"{base_name}_imprinted.pdf")
+        return json_output, grading, grading_pdf_path, imprint_pdf_path
     except Exception as e:
+        return f"❌ Error: {e}", None, None, None
 # ---------- GRADIO APP ----------
+with gr.Blocks(title="LeadIB AI Grading (Alignment + Auto-Grading + Imprint)") as demo:
+    gr.Markdown("## 📘 LeadIB AI Grading\nUpload **Question Paper**, **Markscheme**, and **Student Answer Sheet**.\nSystem aligns → grades → optionally imprints marks.")
     with gr.Row():
         qp_file = gr.File(label="📄 Upload Question Paper (PDF)")
         ms_file = gr.File(label="📄 Upload Markscheme (PDF)")
         ans_file = gr.File(label="📝 Upload Student Answer Sheet (PDF)")
+    imprint_toggle = gr.Checkbox(label="✍ Imprint Marks on Student Answer Sheet", value=False)
     run_button = gr.Button("🚀 Run Alignment + Grading")
     with gr.Row():
         grading_output = gr.Textbox(label="📝 Step 2: Grading (Markdown)", lines=20)
     grading_pdf = gr.File(label="📥 Download Grading PDF")
+    imprint_pdf = gr.File(label="📥 Download Imprinted PDF (Optional)")
     run_button.click(
         fn=align_and_grade,
+        inputs=[qp_file, ms_file, ans_file, imprint_toggle],
+        outputs=[json_output, grading_output, grading_pdf, imprint_pdf]
     )
 if __name__ == "__main__":