Spaces:

atz21
/

leadib

Sleeping

App Files Files Community

atz21 commited on Aug 24, 2025

Commit

02d8496

verified ·

1 Parent(s): 4350cad

markdown

Browse files

Files changed (1) hide show

app.py +53 -31

app.py CHANGED Viewed

@@ -1,34 +1,39 @@
 import os
 import gradio as gr
 import google.generativeai as genai
-from reportlab.platypus import SimpleDocTemplate, Paragraph
-from reportlab.lib.styles import getSampleStyleSheet
-from reportlab.lib.pagesizes import A4
 # -------------------- CONFIG --------------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 # ---------- PROMPTS ----------
-TRANSCRIPTION_PROMPT = """ Your Role: You are an expert technical transcriber specializing in mathematical and scientific documents. Your mission is to convert handwritten solutions from a provided image or PDF into a clean, accurate, and logically structured Markdown format.
 Primary Objective: Preserve the author's intended solution path while filtering out all mistakes, corrections, and extraneous marks. The final output must be perfectly formatted and easy to follow.
 Core Instructions:
 Hierarchical Structure:
-Identify all questions and subquestions based on their numbering (e.g., 1. a), ### i)).
-Use ## for main questions (e.g., ## Question 1).
-Use ### for subquestions (e.g., ### a), ### i)).
-If a question number appears out of its logical sequence, transcribe it with the label provided in the source.
 What to Exclude (Content Filtration):
-Mistakes: Completely ignore and do not transcribe any number, variable, or expression that has been struck through, scribbled over, or crossed out. Transcribe only the corrected, final version.
-Extraneous Marks: Do not include any doodles, underlines (unless part of a fraction), or stray marks not relevant to the solution.
 Crucial Distinction: Cancellations vs. Step Cuts:
-Term Cancellation: This is a valid mathematical step where terms cancel each other out (e.g., +2x and -2x, or a term divided by itself).
-Action: Transcribe the step where the cancellation occurs. Immediately after that line, add a concise, bracketed note explaining what was cancelled.
-Step Cut: This is when the author skips intermediate algebraic or arithmetic steps (e.g., jumping from 2b = 2 directly to b = 1).
-Action: Transcribe the steps exactly as they appear. Do not invent or add the missing steps. The logical jump in the transcribed output serves to represent the step cut.
 Formatting and Special Cases:
-Equations: Enclose all mathematical equations and multi-line calculations in Markdown code blocks for clarity and proper rendering.
-Illegibility: If a specific word or number is impossible to read, use the placeholder [illegible].
-Graphs: Do not attempt to recreate graphs. Instead, describe them textually. Note the type of curve (e.g., parabola, polynomial) and list any labeled key points like intercepts, vertices, or asymptotes."""
 GRADING_PROMPT = """Instructions to Examiners
 Abbreviations:
@@ -108,16 +113,13 @@ Implied marks appear in brackets, e.g. (M1), and can only be awarded if correct
 - More than one solution: mark only the first response unless candidate specifies otherwise.
 """
-# ---------- HELPER: Save to PDF ----------
 def save_as_pdf(text, filename="output.pdf"):
-    styles = getSampleStyleSheet()
-    doc = SimpleDocTemplate(filename, pagesize=A4)
-    story = [Paragraph(p, styles["Normal"]) for p in text.split("\n")]
-    doc.build(story)
     return filename
 # ---------- STEP 1: TRANSCRIPTION ----------
 def transcribe(ans_file):
     try:
@@ -135,7 +137,6 @@ def transcribe(ans_file):
     except Exception as e:
         return f"❌ Error during transcription: {e}", None
 # ---------- STEP 2: GRADING ----------
 def grade(qp_file, ms_file, transcription):
     try:
@@ -143,18 +144,39 @@ def grade(qp_file, ms_file, transcription):
         ms_uploaded = genai.upload_file(path=ms_file, display_name="Marking Scheme")
         model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
-        resp = model.generate_content([GRADING_PROMPT, qp_uploaded, ms_uploaded, transcription])
-        grading = getattr(resp, "text", None)
-        if not grading and resp.candidates:
-            grading = resp.candidates[0].content.parts[0].text
         pdf_path = save_as_pdf(grading, "grading.pdf")
         return grading, pdf_path
     except Exception as e:
         return f"❌ Error during grading: {e}", None
 # ---------- GRADIO APP ----------
 with gr.Blocks(title="LeadIB AI Grading") as demo:
     gr.Markdown("## LeadIB AI Grading\nUpload exam documents to transcribe and grade student answers step by step.")

 import os
 import gradio as gr
 import google.generativeai as genai
+from markdown_pdf import MarkdownPdf, Section
 # -------------------- CONFIG --------------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 # ---------- PROMPTS ----------
+TRANSCRIPTION_PROMPT = """Your Role: You are an expert technical transcriber specializing in mathematical and scientific documents. Your mission is to convert handwritten solutions from a provided image or PDF into a clean, accurate, and logically structured Markdown format.
 Primary Objective: Preserve the author's intended solution path while filtering out all mistakes, corrections, and extraneous marks. The final output must be perfectly formatted and easy to follow.
 Core Instructions:
 Hierarchical Structure:
+- Identify all questions and subquestions based on their numbering (e.g., 1. a), i)).
+- Use ## for main questions (e.g., ## Question 1).
+- Use ### for subquestions (e.g., ### a), ### i)).
+- If a question number appears out of its logical sequence, transcribe it with the label provided in the source.
 What to Exclude (Content Filtration):
+- Mistakes: Completely ignore and do not transcribe any number, variable, or expression that has been struck through, scribbled over, or crossed out. Transcribe only the corrected, final version.
+- Extraneous Marks: Do not include any doodles, underlines (unless part of a fraction), or stray marks not relevant to the solution.
 Crucial Distinction: Cancellations vs. Step Cuts:
+- Term Cancellation: This is a valid mathematical step where terms cancel each other out (e.g., +2x and -2x, or a term divided by itself).
+  Action: Transcribe the step where the cancellation occurs. Immediately after that line, add a concise, bracketed note explaining what was cancelled.
+- Step Cut: This is when the author skips intermediate algebraic or arithmetic steps (e.g., jumping from 2b = 2 directly to b = 1).
+  Action: Transcribe the steps exactly as they appear. Do not invent or add the missing steps. The logical jump in the transcribed output serves to represent the step cut.
 Formatting and Special Cases:
+- Equations: Enclose all mathematical equations and multi-line calculations in Markdown code blocks for clarity and proper rendering.
+- Illegibility: If a specific word or number is impossible to read, use the placeholder [illegible].
+- Graphs: Do not attempt to recreate graphs. Instead, describe them textually. Note the type of curve (e.g., parabola, polynomial) and list any labeled key points like intercepts, vertices, or asymptotes.
+"""
+# Full 9-rule grading prompt (point 9 is Presentation; "Calculators" section removed)
 GRADING_PROMPT = """Instructions to Examiners
 Abbreviations:
 - More than one solution: mark only the first response unless candidate specifies otherwise.
 """
+# ---------- HELPER: Save to PDF using markdown-pdf ----------
 def save_as_pdf(text, filename="output.pdf"):
+    pdf = MarkdownPdf()
+    pdf.add_section(Section(text, toc=False))
+    pdf.save(filename)
     return filename
 # ---------- STEP 1: TRANSCRIPTION ----------
 def transcribe(ans_file):
     try:
     except Exception as e:
         return f"❌ Error during transcription: {e}", None
 # ---------- STEP 2: GRADING ----------
 def grade(qp_file, ms_file, transcription):
     try:
         ms_uploaded = genai.upload_file(path=ms_file, display_name="Marking Scheme")
         model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
+        # Prompt that embeds the full 9 rules and enforces a structured grading table
+        structured_instructions = (
+            "You are an official examiner. Use the following grading rules strictly:\n\n"
+            f"{GRADING_PROMPT}\n\n"
+            "OUTPUT FORMAT (use GitHub-flavored Markdown table):\n\n"
+            "| Student wrote | Marks Awarded | Reason (reference the rules; specify error type: A : All Good , B : Silly Mistake , C : Conceptual Error , D : Hard question ,  E : Not Applicable) |\n"
+            "|---|---|---|\n"
+            "Then, after the table, provide a short 'Summary & Final Mark' section with totals and any FT usage noted.\n\n"
+            "Guidelines:\n"
+            "1) Apply marks exactly as per the markscheme.\n"
+            "2) Justify each awarded or withheld mark with explicit references to the numbered rules.\n"
+            "3) Classify all errors (Conceptual Error, Silly Mistake, Misread, or None).\n"
+            "4) Enforce dependency between M and A marks (no A awarded if M not earned) and indicate FT when applied.\n"
+            "5) Do not invent marks that are not present in the markscheme.\n"
+            "6) Provide step-by-step reasoning for each mark awarded or withheld.\n"
+        )
+        response = model.generate_content([
+            structured_instructions,
+            qp_uploaded,      # uploaded question paper
+            ms_uploaded,      # uploaded marking scheme
+            transcription     # student's transcription
+        ])
+        grading = getattr(response, "text", None)
+        if not grading and response.candidates:
+            grading = response.candidates[0].content.parts[0].text
         pdf_path = save_as_pdf(grading, "grading.pdf")
         return grading, pdf_path
     except Exception as e:
         return f"❌ Error during grading: {e}", None
 # ---------- GRADIO APP ----------
 with gr.Blocks(title="LeadIB AI Grading") as demo:
     gr.Markdown("## LeadIB AI Grading\nUpload exam documents to transcribe and grade student answers step by step.")