gradeai

Sleeping

App Files Files Community

atz21 commited on Aug 25, 2025

Commit

1674b1c

verified ·

1 Parent(s): eea77c8

3 step process

Browse files

Files changed (1) hide show

app.py +156 -126

app.py CHANGED Viewed

@@ -1,140 +1,158 @@
 import os
 import gradio as gr
 import google.generativeai as genai
-from reportlab.platypus import SimpleDocTemplate, Paragraph
-from reportlab.lib.styles import getSampleStyleSheet
-from reportlab.lib.pagesizes import A4
 # -------------------- CONFIG --------------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 # ---------- PROMPTS ----------
-TRANSCRIPTION_PROMPT = """Your Role: You are an expert technical transcriber specializing in mathematical and scientific documents. Your mission is to convert handwritten solutions from a provided image or PDF into a clean, accurate, and logically structured Markdown format.
-Primary Objective: Preserve the author's intended solution path while filtering out all mistakes, corrections, and extraneous marks. The final output must be perfectly formatted and easy to follow.
-Core Instructions:
-Hierarchical Structure:
-Identify all questions and subquestions based on their numbering (e.g., 1. a), ### i)).
-Use ## for main questions (e.g., ## Question 1).
-Use ### for subquestions (e.g., ### a), ### i)).
-If a question number appears out of its logical sequence, transcribe it with the label provided in the source.
-What to Exclude (Content Filtration):
-Mistakes: Completely ignore and do not transcribe any number, variable, or expression that has been struck through, scribbled over, or crossed out. Transcribe only the corrected, final version.
-Extraneous Marks: Do not include any doodles, underlines (unless part of a fraction), or stray marks not relevant to the solution.
-Crucial Distinction: Cancellations vs. Step Cuts:
-Term Cancellation: This is a valid mathematical step where terms cancel each other out (e.g., +2x and -2x, or a term divided by itself).
-Action: Transcribe the step where the cancellation occurs. Immediately after that line, add a concise, bracketed note explaining what was cancelled.
-Step Cut: This is when the author skips intermediate algebraic or arithmetic steps (e.g., jumping from 2b = 2 directly to b = 1).
-Action: Transcribe the steps exactly as they appear. Do not invent or add the missing steps. The logical jump in the transcribed output serves to represent the step cut.
-Formatting and Special Cases:
-Equations: Enclose all mathematical equations and multi-line calculations in Markdown code blocks for clarity and proper rendering.
-Illegibility: If a specific word or number is impossible to read, use the placeholder [illegible].
-Graphs: Do not attempt to recreate graphs. Instead, describe them textually. Note the type of curve (e.g., parabola, polynomial) and list any labeled key points like intercepts, vertices, or asymptotes."""
-GRADING_PROMPT = """Instructions to Examiners:
-Abbreviations:
-- M: Marks for correct Method.
-- A: Marks for Answer or Accuracy (often depends on preceding M mark).
-- R: Marks for clear Reasoning.
-- AG: Answer given in the question; no marks awarded.
-- FT: Follow Through; award marks for correct method/answer using incorrect earlier results.
-Marking Rules:
-1. Always follow the markscheme annotations (M1, A2, etc.).
-2. M marks must be earned before dependent A marks are awarded (no M0 followed by A1 unless explicitly allowed).
-3. If M and A marks are on the same line (e.g., M1A1), M is for the method attempt, A is for correct values.
-4. Multiple A marks on the same line are awarded independently unless otherwise noted.
-5. Do not split M2, A3, etc. unless instructed.
-6. "Show that" responses do not need to restate the AG line unless noted.
-7. Once a correct answer is seen, ignore further incorrect working unless it affects a later part (then apply FT as appropriate).
-8. Do not award the final A mark if an incorrect approximation is used in the same part.
-Error Avoidance:
-- **No incorrect mark allocation:** Do not award marks unless they are explicitly justified by the markscheme.
-- **No misclassification of errors:** Distinguish correctly between "Conceptual Errors" and "Silly Mistakes."
-- **Follow markscheme logic exactly:** Especially regarding when to withhold accuracy marks if method marks are not earned."""
 # ---------- HELPER: Save to PDF ----------
 def save_as_pdf(text, filename="output.pdf"):
-    styles = getSampleStyleSheet()
-    doc = SimpleDocTemplate(filename, pagesize=A4)
-    story = [Paragraph(p, styles["Normal"]) for p in text.split("\n")]
-    doc.build(story)
     return filename
-# ---------- HELPER: Safe Generate with Retry ----------
-def safe_generate(model, inputs, fallback_prompt=None):
-    try:
-        # Try normal generate
-        resp = model.generate_content(inputs)
-        cand = resp.candidates[0] if resp.candidates else None
-        if cand and cand.content and cand.content.parts:
-            return resp.text, None
-        reason = getattr(cand, "finish_reason", "None")
-        if reason == "1":  # SAFETY block
-            # Retry with streaming
-            chunks = []
-            stream_resp = model.generate_content(inputs, stream=True)
-            for chunk in stream_resp:
-                if chunk.candidates and chunk.candidates[0].content.parts:
-                    chunks.append(chunk.text)
-            if chunks:
-                return "".join(chunks), None
-            # Retry with simplified prompt if provided
-            if fallback_prompt:
-                retry_resp = model.generate_content([fallback_prompt] + inputs[1:], stream=True)
-                chunks = []
-                for chunk in retry_resp:
-                    if chunk.candidates and chunk.candidates[0].content.parts:
-                        chunks.append(chunk.text)
-                if chunks:
-                    return "".join(chunks), None
-        return None, f"❌ Empty/blocked response. finish_reason={reason}, safety_ratings={getattr(cand, 'safety_ratings', None)}"
-    except Exception as e:
-        return None, f"❌ Exception: {e}"
-# ---------- COMMON SAFETY SETTINGS ----------
-safety_settings = [
-    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
-    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
-    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
-    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
-]
-# ---------- STEP 1: TRANSCRIPTION ----------
-def transcribe(ans_file):
     try:
         ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
-        model = genai.GenerativeModel(
-            "gemini-2.5-pro",
-            generation_config={"temperature": 0},
-            safety_settings=safety_settings
-        )
-        transcription, error = safe_generate(model, [TRANSCRIPTION_PROMPT, ans_uploaded], fallback_prompt="Convert the PDF into structured plain text with questions separated.")
-        if error:
-            return error, None
-        pdf_path = save_as_pdf(transcription, "transcription.pdf")
         return transcription, pdf_path
     except Exception as e:
         return f"❌ Error during transcription: {e}", None
-# ---------- STEP 2: GRADING ----------
-def grade(qp_file, ms_file, transcription):
     try:
         qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
-        ms_uploaded = genai.upload_file(path=ms_file, display_name="Marking Scheme")
-        model = genai.GenerativeModel(
-            "gemini-2.5-pro",
-            generation_config={"temperature": 0},
-            safety_settings=safety_settings
-        )
-        grading, error = safe_generate(model, [GRADING_PROMPT, qp_uploaded, ms_uploaded, transcription], fallback_prompt="Grade the answers according to the marking scheme. Show marks step by step.")
-        if error:
-            return error, None
         pdf_path = save_as_pdf(grading, "grading.pdf")
         return grading, pdf_path
@@ -142,36 +160,48 @@ def grade(qp_file, ms_file, transcription):
         return f"❌ Error during grading: {e}", None
 # ---------- GRADIO APP ----------
-with gr.Blocks(title="LeadIB AI Grading") as demo:
-    gr.Markdown("## LeadIB AI Grading\nUpload exam documents to transcribe and grade student answers step by step.")
     with gr.Row():
         qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
         ms_file = gr.File(label="Upload Mark Scheme (PDF)", type="filepath")
         ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
-    # Step 1: Transcription
-    transcribe_btn = gr.Button("Step 1: Transcribe Answer Sheet")
     with gr.Row():
-        transcription_out = gr.Textbox(label="📄 Student Transcription", lines=20)
-        transcription_pdf = gr.File(label="⬇️ Download Transcription (PDF)")
-    # Step 2: Grading
-    grade_btn = gr.Button("Step 2: Grade the Student")
     with gr.Row():
         grading_out = gr.Textbox(label="✅ Grading Report (Step-by-Step)", lines=20)
-        grading_pdf = gr.File(label="⬇️ Download Grading (PDF)")
     # Button Logic
-    transcribe_btn.click(
-        fn=transcribe,
         inputs=[ans_file],
-        outputs=[transcription_out, transcription_pdf],
         show_progress=True
     )
     grade_btn.click(
         fn=grade,
-        inputs=[qp_file, ms_file, transcription_out],
         outputs=[grading_out, grading_pdf],
         show_progress=True
     )

 import os
 import gradio as gr
 import google.generativeai as genai
+from markdown_pdf import MarkdownPdf, Section
 # -------------------- CONFIG --------------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 # ---------- PROMPTS ----------
+# Student transcription prompt
+TRANSCRIPTION_PROMPT = """Your Role: You are an expert technical transcriber specializing in mathematical and scientific documents.
+Your mission is to convert handwritten solutions from a provided image or PDF into a clean, accurate, and logically structured Markdown format.
+Instructions:
+- Use ## for questions, ### for subquestions.
+- Transcribe only the corrected, final version of the solution (ignore scribbles, cancellations, mistakes).
+- Keep mathematical expressions in fenced code blocks.
+- If something is illegible, use [illegible].
+- Do not recreate graphs, only describe them.
+"""
+# Markscheme transcription prompt
+MARKSCHEME_TRANSCRIPTION_PROMPT = """Your Role: You are an expert transcriber.
+Convert the official marking scheme from the provided PDF into clean, structured Markdown.
+Instructions:
+- Preserve all structure (questions, subquestions).
+- Keep M, A, R annotations exactly as written.
+- Represent alternative methods clearly (METHOD 1, METHOD 2, etc.).
+- Preserve any accuracy requirements.
+- Format in Markdown using ## and ### for hierarchy.
+- Use code blocks for equations.
+"""
+# Grading prompt with rules + red highlighting
+GRADING_PROMPT = """You are an official examiner. Use the following grading rules strictly.
+Abbreviations:
+- M: Marks awarded for attempting to use a correct Method.
+- A: Marks awarded for an Answer or for Accuracy; often dependent on preceding M marks.
+- R: Marks awarded for clear Reasoning.
+- AG: Answer given in the question and so no marks are awarded.
+- FT: Follow through. The practice of awarding marks, despite candidate errors in previous parts, for their correct methods/answers using incorrect results.
+--------------------------------------------
+## 1. General
+Award marks using the annotations as noted in the markscheme (e.g., M1, A2).
+## 2. Method and Answer/Accuracy marks
+- Do not automatically award full marks for a correct answer; all working must be checked.
+- It is generally not possible to award M0 followed by A1.
+- Where M and A marks are noted on the same line (M1A1), M is for method, A is for accuracy.
+- Multiple A marks can be independent.
+## 3. Implied marks
+Implied marks (M1) can only be awarded if correct work is seen or implied.
+## 4. Follow through (FT) marks
+- Award FT if an earlier wrong answer is used consistently later.
+- Do not award FT if the result contradicts the question (e.g., probability > 1).
+## 5. Mis-read (MR)
+- Penalize once if the candidate misreads a value.
+- Award other marks as appropriate.
+## 6. Alternative methods
+- Accept valid alternatives unless "Hence" forbids it.
+## 7. Alternative forms
+- Accept equivalent numeric/algebraic forms unless specified otherwise.
+## 8. Format and accuracy of answers
+- Use correct accuracy (3 s.f. if not specified).
+- Arithmetic and algebra should be simplified.
+## 9. Presentation of candidate work
+- Ignore crossed-out work unless indicated.
+- Mark only the first solution unless candidate specifies otherwise.
+--------------------------------------------
+### OUTPUT FORMAT
+Produce a GitHub-flavored Markdown table with 3 columns:
+| Student wrote | Marks Awarded | Reason |
+|---------------|---------------|--------|
+Special Formatting Rule:
+- Whenever a mark is lost (M0, A0, R0 etc.), wrap it in red using: `<span style="color:red">M0</span>`.
+- Keep awarded marks (M1, A1, etc.) in plain text.
+- If mixed (e.g., M1A0A1), only highlight the lost marks (`A0`).
+After the table, provide:
+### Summary & Final Mark
+- Total marks obtained vs total available
+- Any FT (follow-through) applied
+- Classification of errors (Conceptual, Silly mistake, Misread, etc.)
+"""
 # ---------- HELPER: Save to PDF ----------
 def save_as_pdf(text, filename="output.pdf"):
+    pdf = MarkdownPdf()
+    pdf.add_section(Section(text, toc=False))
+    pdf.save(filename)
     return filename
+# ---------- STEP 1: TRANSCRIBE STUDENT ----------
+def transcribe_student(ans_file):
     try:
         ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
+        model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
+        resp = model.generate_content([TRANSCRIPTION_PROMPT, ans_uploaded])
+        transcription = getattr(resp, "text", None)
+        if not transcription and resp.candidates:
+            transcription = resp.candidates[0].content.parts[0].text
+        pdf_path = save_as_pdf(transcription, "student_transcription.pdf")
         return transcription, pdf_path
     except Exception as e:
         return f"❌ Error during transcription: {e}", None
+# ---------- STEP 2: TRANSCRIBE MARKSCHEME ----------
+def transcribe_ms(ms_file):
+    try:
+        ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
+        model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
+        resp = model.generate_content([MARKSCHEME_TRANSCRIPTION_PROMPT, ms_uploaded])
+        ms_transcription = getattr(resp, "text", None)
+        if not ms_transcription and resp.candidates:
+            ms_transcription = resp.candidates[0].content.parts[0].text
+        pdf_path = save_as_pdf(ms_transcription, "ms_transcription.pdf")
+        return ms_transcription, pdf_path
+    except Exception as e:
+        return f"❌ Error during MS transcription: {e}", None
+# ---------- STEP 3: GRADING ----------
+def grade(qp_file, ms_transcription, student_transcription):
     try:
         qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
+        model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
+        response = model.generate_content([
+            GRADING_PROMPT,
+            qp_uploaded,
+            "### Markscheme Transcription:\n" + ms_transcription,
+            "### Student Transcription:\n" + student_transcription
+        ])
+        grading = getattr(response, "text", None)
+        if not grading and response.candidates:
+            grading = response.candidates[0].content.parts[0].text
         pdf_path = save_as_pdf(grading, "grading.pdf")
         return grading, pdf_path
         return f"❌ Error during grading: {e}", None
 # ---------- GRADIO APP ----------
+with gr.Blocks(title="LeadIB AI Grading (3-Step)") as demo:
+    gr.Markdown("## LeadIB AI Grading (3-Step)\nUpload exam documents to transcribe and grade step by step.")
     with gr.Row():
         qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
         ms_file = gr.File(label="Upload Mark Scheme (PDF)", type="filepath")
         ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
+    # Step 1: Transcribe Student
+    transcribe_student_btn = gr.Button("Step 1: Transcribe Student Answer Sheet")
     with gr.Row():
+        student_out = gr.Textbox(label="📄 Student Transcription", lines=20)
+        student_pdf = gr.File(label="⬇️ Download Student Transcription (PDF)")
+    # Step 2: Transcribe Markscheme
+    transcribe_ms_btn = gr.Button("Step 2: Transcribe Markscheme")
+    with gr.Row():
+        ms_out = gr.Textbox(label="📄 Markscheme Transcription", lines=20)
+        ms_pdf = gr.File(label="⬇️ Download Markscheme Transcription (PDF)")
+    # Step 3: Grading
+    grade_btn = gr.Button("Step 3: Grade the Student")
     with gr.Row():
         grading_out = gr.Textbox(label="✅ Grading Report (Step-by-Step)", lines=20)
+        grading_pdf = gr.File(label="⬇️ Download Grading Report (PDF)")
     # Button Logic
+    transcribe_student_btn.click(
+        fn=transcribe_student,
         inputs=[ans_file],
+        outputs=[student_out, student_pdf],
+        show_progress=True
+    )
+    transcribe_ms_btn.click(
+        fn=transcribe_ms,
+        inputs=[ms_file],
+        outputs=[ms_out, ms_pdf],
         show_progress=True
     )
     grade_btn.click(
         fn=grade,
+        inputs=[qp_file, ms_out, student_out],
         outputs=[grading_out, grading_pdf],
         show_progress=True
     )