neurolearn

Sleeping

App Files Files Community

atz21 commited on Sep 15, 2025

Commit

db86cb5

verified ·

1 Parent(s): cda728b

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -76

app.py CHANGED Viewed

@@ -7,6 +7,11 @@ import json
 import traceback
 import re
 import concurrent.futures
 # ---------- PROMPTS ----------
 PROMPTS = {
@@ -20,17 +25,14 @@ Each object must have exactly these keys:
 - "qp": string (exact question text or "[Not found]")
 - "ms": string (relevant markscheme text or "[Not found]")
 - "as": string (final cleaned student answer; "[No response]" or "[illegible]" if needed)
 ### Numbering Rules
 - Always use **logical order of questions** (1, 2, 3, …) regardless of how they are labeled in the PDF.
 - If the QP shows a mismatch (e.g., under "Question 1" the serial number says "12"), **still treat it as Q1**.
 - Subparts must be written in standard form (e.g., "1(a)", "1(b)(ii)").
 ### Formatting Rules
 - Preserve math inside fenced code ```...```.
 - If diagram/graph missing, write "[Graph omitted]".
 - Do not add extra commentary outside JSON.
 ## Example
 [
   {
@@ -52,7 +54,6 @@ Each object must have exactly these keys:
 - **AG**: Answer given in question—no marks
 - **FT**: Follow Through marks (if error carried forward correctly)
 - **MR**: Deduct for misread (once only)
 ---
 ## Grading Instructions
 1. Award marks using official annotations (e.g., M1, A2).
@@ -62,16 +63,13 @@ Each object must have exactly these keys:
 5. Apply FT where appropriate.
 6. Use proper notation: M1A0, A1, etc.
 7. Any lost mark: use red `<span style="color:red">M0</span>` and make Reason red.
 ---
 ## Output Format
 1. Produce a GitHub-flavored Markdown table with columns:
    | Student wrote | Marks Awarded | Reason |
    - Each row = one markable step/point, in order.
    - For blanks: “(no answer)” with marks lost.
 2. After the table, write ONLY one line for total marks in the form:  Final Marks: X / Y
 ⚠️ Do NOT include summaries, error classifications, or extra commentary.
 Only table + final marks line.
 """
@@ -83,7 +81,6 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 # ---------- HELPER: Save to PDF ----------
 def save_as_pdf(text, filename="output.pdf"):
-    print(f"📄 Saving grading report to PDF: {filename}")
     pdf = MarkdownPdf()
     pdf.add_section(Section(text, toc=False))
     pdf.save(filename)
@@ -91,17 +88,14 @@ def save_as_pdf(text, filename="output.pdf"):
 # ---------- HELPER: Compress PDF ----------
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
-    print(f"🗜️ Checking if compression needed for {input_path}...")
     if output_path is None:
         base, ext = os.path.splitext(input_path)
         output_path = f"{base}_compressed{ext}"
     if os.path.getsize(input_path) <= max_size:
-        print("✅ No compression needed")
         return input_path
     try:
-        print(f"⚡ Compressing {input_path} → {output_path}")
         gs_cmd = [
             "gs", "-sDEVICE=pdfwrite",
             "-dCompatibilityLevel=1.4",
@@ -110,24 +104,18 @@ def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
             f"-sOutputFile={output_path}", input_path
         ]
         subprocess.run(gs_cmd, check=True)
         if os.path.getsize(output_path) <= max_size:
-            print("✅ Compression successful")
             return output_path
         else:
-            print("⚠️ Compression did not reduce size enough, using original")
             return input_path
-    except Exception as e:
-        print(f"❌ Compression failed: {e}")
         return input_path
 # ---------- HELPER: Create Model with Fallback ----------
 def create_model():
     try:
-        print("⚡ Using gemini-2.5-pro model")
         return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
     except Exception:
-        print("⚡ Falling back to gemini-2.5-flash model")
         return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
 # ---------- HELPER: Clean JSON Output ----------
@@ -141,26 +129,21 @@ def clean_json_output(raw_text: str) -> str:
     return cleaned
 # ---------- PIPELINE: ALIGN + GRADE ----------
-def align_and_grade(qp_file, ms_file, ans_file):
     try:
-        print("🚀 Starting alignment + grading pipeline")
-        # Step 0: Compress if needed
-        print("🔍 Step 0: Compressing PDFs (if needed)")
         qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
         ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
         ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
         # Step 1: Uploads
-        print("📤 Step 1: Uploading PDFs to Gemini...")
         qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
         ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
         ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
         model = create_model()
-        # Step 2: Alignment → JSON
-        print("🧩 Step 2: Aligning QP, MS, and AS into JSON...")
         resp = model.generate_content([
             PROMPTS["ALIGNMENT_PROMPT"]["content"],
             qp_uploaded,
@@ -172,21 +155,11 @@ def align_and_grade(qp_file, ms_file, ans_file):
             aligned_json = resp.candidates[0].content.parts[0].text
         aligned_json = clean_json_output(aligned_json)
-        try:
-            questions = json.loads(aligned_json)
-            print(f"✅ Parsed JSON with {len(questions)} questions")
-        except Exception as e:
-            print("❌ JSON parsing failed")
-            traceback.print_exc()
-            return f"❌ JSON parsing error: {e}", None, None
-        # Step 3: Grading (parallelized but order preserved)
-        print("📝 Step 3: Grading each question in parallel...")
         def grade_one(idx_q):
             idx, q = idx_q
-            print(f"   ➡️ Grading Question {q['question_number']}")
             q_json = json.dumps(q, indent=2)
             response = model.generate_content([
                 PROMPTS["GRADING_PROMPT"]["content"],
@@ -199,61 +172,120 @@ def align_and_grade(qp_file, ms_file, ans_file):
         with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
             results = list(executor.map(grade_one, enumerate(questions)))
-        # Sort results back into original order
         results.sort(key=lambda x: x[0])
-        # Step 4: Build report and marks summary
         grading_sections = []
-        marks_summary = []
-        total_awarded, total_possible = 0, 0
         for _, qnum, grading_piece in results:
             section = f"## Question {qnum}\n\n{grading_piece}"
             grading_sections.append(section)
-            # Extract marks from "Final Marks: X / Y"
-            match = re.search(r"Final Marks:\s*(\d+)\s*/\s*(\d+)", grading_piece)
-            if match:
-                awarded, possible = int(match.group(1)), int(match.group(2))
-                marks_summary.append((qnum, awarded, possible))
-                total_awarded += awarded
-                total_possible += possible
-            else:
-                marks_summary.append((qnum, 0, 0))
-        # Build summary table
-        summary_table = ["\n\n# Final Marks Summary\n",
-            "| Question | Marks Awarded | Total Marks |",
-            "|----------|---------------|-------------|"]
-        for qnum, awarded, possible in marks_summary:
-            summary_table.append(f"| {qnum} | {awarded} | {possible} |")
-        summary_table.append(f"| **Total** | **{total_awarded}** | **{total_possible}** |")
-        grading_report = "\n\n".join(grading_sections) + "\n".join(summary_table)
-        # Step 5: Save grading report (Markdown → PDF)
-        print("📄 Step 5: Saving grading report to PDF...")
         base_name = os.path.splitext(os.path.basename(ans_file))[0]
         grading_pdf_path = save_as_pdf(grading_report, f"{base_name}_graded.pdf")
-        print("✅ Pipeline completed successfully")
-        return json.dumps(questions, indent=2), grading_report, grading_pdf_path
     except Exception as e:
-        print("❌ Fatal error in pipeline")
         traceback.print_exc()
-        return f"❌ Error: {e}", None, None
 # ---------- GRADIO APP ----------
-with gr.Blocks(title="LeadIB AI Grading (JSON Alignment + Auto-Grading)") as demo:
-    gr.Markdown("## LeadIB AI Grading\nUpload Question Paper, Markscheme, and Student Answer Sheet.\nThe system will align (as JSON) and grade automatically.")
     with gr.Row():
         qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
         ms_file = gr.File(label="Upload Markscheme (PDF)", type="filepath")
         ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
     run_btn = gr.Button("Start Alignment + Auto-Grading")
     with gr.Row():
@@ -261,16 +293,17 @@ with gr.Blocks(title="LeadIB AI Grading (JSON Alignment + Auto-Grading)") as dem
     with gr.Row():
         grading_out = gr.Textbox(label="✅ Grading Report (Markdown)", lines=20)
         grading_pdf = gr.File(label="⬇️ Download Grading Report (PDF)")
     run_btn.click(
         fn=align_and_grade,
-        inputs=[qp_file, ms_file, ans_file],
-        outputs=[aligned_out, grading_out, grading_pdf],
         show_progress=True
     )
 if __name__ == "__main__":
     demo.launch()

 import traceback
 import re
 import concurrent.futures
+from pdf2image import convert_from_path
+from PIL import Image, ImageDraw, ImageFont
+import cv2
+import numpy as np
+import img2pdf
 # ---------- PROMPTS ----------
 PROMPTS = {
 - "qp": string (exact question text or "[Not found]")
 - "ms": string (relevant markscheme text or "[Not found]")
 - "as": string (final cleaned student answer; "[No response]" or "[illegible]" if needed)
 ### Numbering Rules
 - Always use **logical order of questions** (1, 2, 3, …) regardless of how they are labeled in the PDF.
 - If the QP shows a mismatch (e.g., under "Question 1" the serial number says "12"), **still treat it as Q1**.
 - Subparts must be written in standard form (e.g., "1(a)", "1(b)(ii)").
 ### Formatting Rules
 - Preserve math inside fenced code ```...```.
 - If diagram/graph missing, write "[Graph omitted]".
 - Do not add extra commentary outside JSON.
 ## Example
 [
   {
 - **AG**: Answer given in question—no marks
 - **FT**: Follow Through marks (if error carried forward correctly)
 - **MR**: Deduct for misread (once only)
 ---
 ## Grading Instructions
 1. Award marks using official annotations (e.g., M1, A2).
 5. Apply FT where appropriate.
 6. Use proper notation: M1A0, A1, etc.
 7. Any lost mark: use red `<span style="color:red">M0</span>` and make Reason red.
 ---
 ## Output Format
 1. Produce a GitHub-flavored Markdown table with columns:
    | Student wrote | Marks Awarded | Reason |
    - Each row = one markable step/point, in order.
    - For blanks: “(no answer)” with marks lost.
 2. After the table, write ONLY one line for total marks in the form:  Final Marks: X / Y
 ⚠️ Do NOT include summaries, error classifications, or extra commentary.
 Only table + final marks line.
 """
 # ---------- HELPER: Save to PDF ----------
 def save_as_pdf(text, filename="output.pdf"):
     pdf = MarkdownPdf()
     pdf.add_section(Section(text, toc=False))
     pdf.save(filename)
 # ---------- HELPER: Compress PDF ----------
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
     if output_path is None:
         base, ext = os.path.splitext(input_path)
         output_path = f"{base}_compressed{ext}"
     if os.path.getsize(input_path) <= max_size:
         return input_path
     try:
         gs_cmd = [
             "gs", "-sDEVICE=pdfwrite",
             "-dCompatibilityLevel=1.4",
             f"-sOutputFile={output_path}", input_path
         ]
         subprocess.run(gs_cmd, check=True)
         if os.path.getsize(output_path) <= max_size:
             return output_path
         else:
             return input_path
+    except Exception:
         return input_path
 # ---------- HELPER: Create Model with Fallback ----------
 def create_model():
     try:
         return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
     except Exception:
         return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
 # ---------- HELPER: Clean JSON Output ----------
     return cleaned
 # ---------- PIPELINE: ALIGN + GRADE ----------
+def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
     try:
+        # Step 0: Compress
         qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
         ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
         ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
         # Step 1: Uploads
         qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
         ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
         ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
         model = create_model()
+        # Step 2: Alignment
         resp = model.generate_content([
             PROMPTS["ALIGNMENT_PROMPT"]["content"],
             qp_uploaded,
             aligned_json = resp.candidates[0].content.parts[0].text
         aligned_json = clean_json_output(aligned_json)
+        questions = json.loads(aligned_json)
+        # Step 3: Grading
         def grade_one(idx_q):
             idx, q = idx_q
             q_json = json.dumps(q, indent=2)
             response = model.generate_content([
                 PROMPTS["GRADING_PROMPT"]["content"],
         with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
             results = list(executor.map(grade_one, enumerate(questions)))
         results.sort(key=lambda x: x[0])
+        # Step 4: Build report
         grading_sections = []
+        grading_json = {"grading": []}
         for _, qnum, grading_piece in results:
             section = f"## Question {qnum}\n\n{grading_piece}"
             grading_sections.append(section)
+            # Extract marks list
+            marks_list = re.findall(r"(M[01]|A[0-9]|R[01])", grading_piece)
+            grading_json["grading"].append({"question": qnum, "marks_awarded": marks_list})
+        grading_report = "\n\n".join(grading_sections)
         base_name = os.path.splitext(os.path.basename(ans_file))[0]
         grading_pdf_path = save_as_pdf(grading_report, f"{base_name}_graded.pdf")
+        imprint_pdf_path = None
+        if imprint:
+            imprint_pdf_path = imprint_marks(ans_file, grading_json, model)
+        return json.dumps(questions, indent=2), grading_report, grading_pdf_path, imprint_pdf_path
     except Exception as e:
         traceback.print_exc()
+        return f"❌ Error: {e}", None, None, None
+# ---------- PIPELINE: IMPRINT MARKS ----------
+def imprint_marks(ans_pdf, grading_json, model, grid_rows=20, grid_cols=14):
+    output_dir = "grid_pages"
+    os.makedirs(output_dir, exist_ok=True)
+    pages = convert_from_path(ans_pdf, dpi=200)
+    page_images = []
+    # Create grid images
+    for i, page in enumerate(pages):
+        img_path = os.path.join(output_dir, f"page_{i+1}_grid.png")
+        img = page.convert("RGB")
+        draw = ImageDraw.Draw(img)
+        w, h = img.size
+        cell_w, cell_h = w / grid_cols, h / grid_rows
+        try:
+            num_font = ImageFont.truetype("arial.ttf", 20)
+        except IOError:
+            num_font = ImageFont.load_default()
+        cell_num = 1
+        for r in range(grid_rows):
+            for c in range(grid_cols):
+                x = int(c * cell_w + cell_w / 2)
+                y = int(r * cell_h + cell_h / 2)
+                text = str(cell_num)
+                bbox = draw.textbbox((0, 0), text, font=num_font)
+                tw = bbox[2] - bbox[0]
+                th = bbox[3] - bbox[1]
+                draw.text((x - tw/2, y - th/2), text, fill="black", font=num_font)
+                cell_num += 1
+        img.save(img_path, "PNG")
+        page_images.append(img_path)
+    annotated_pages = []
+    for idx, page in enumerate(pages):
+        # Ask Gemini for mapping
+        prompt = f"""
+You are an exam marker. The page is divided into a {grid_rows} x {grid_cols} grid with numbered cells.
+Return JSON: [{{"question": "1(a)", "cell_number": 15}}, ...]
+Grading JSON:
+{json.dumps(grading_json, indent=2)}
+"""
+        response = model.generate_content([prompt, Image.open(page_images[idx])])
+        mapping_text = getattr(response, "text", "")
+        match = re.search(r'\[.*\]', mapping_text, re.DOTALL)
+        mapping = json.loads(match.group(0)) if match else []
+        # Annotate
+        img = np.array(page.convert("RGB"))
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        h, w, _ = img.shape
+        cell_w, cell_h = w / grid_cols, h / grid_rows
+        for item in mapping:
+            q = item["question"]
+            cell_number = item["cell_number"]
+            row = (cell_number - 1) // grid_cols
+            col = (cell_number - 1) % grid_cols
+            marks_list = next((g["marks_awarded"] for g in grading_json["grading"] if g["question"] == q), [])
+            marks_text = ",".join(marks_list)
+            x_c = int((col+1) * cell_w - cell_w/4)
+            y_c = int((row+0.5) * cell_h)
+            cv2.putText(img, marks_text, (x_c, y_c), cv2.FONT_HERSHEY_SIMPLEX,
+                        1.5, (0, 0, 255), 3, cv2.LINE_AA)
+        annotated_path = os.path.join(output_dir, f"annotated_{idx+1}.png")
+        cv2.imwrite(annotated_path, img)
+        annotated_pages.append(annotated_path)
+    output_pdf = "answer_sheet_with_marks.pdf"
+    with open(output_pdf, "wb") as f:
+        f.write(img2pdf.convert(annotated_pages))
+    return output_pdf
 # ---------- GRADIO APP ----------
+with gr.Blocks(title="LeadIB AI Grading with Optional Imprinting") as demo:
+    gr.Markdown("## LeadIB AI Grading\nUpload QP, MS, and AS. Get aligned JSON, grading report, and optionally imprint marks on the answer sheet.")
     with gr.Row():
         qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
         ms_file = gr.File(label="Upload Markscheme (PDF)", type="filepath")
         ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
+    imprint_opt = gr.Checkbox(label="Imprint Marks on Answer Sheet?", value=False)
     run_btn = gr.Button("Start Alignment + Auto-Grading")
     with gr.Row():
     with gr.Row():
         grading_out = gr.Textbox(label="✅ Grading Report (Markdown)", lines=20)
+    with gr.Row():
         grading_pdf = gr.File(label="⬇️ Download Grading Report (PDF)")
+        imprint_pdf = gr.File(label="⬇️ Download Answer Sheet with Imprinted Marks (PDF)")
     run_btn.click(
         fn=align_and_grade,
+        inputs=[qp_file, ms_file, ans_file, imprint_opt],
+        outputs=[aligned_out, grading_out, grading_pdf, imprint_pdf],
         show_progress=True
     )
 if __name__ == "__main__":
     demo.launch()