Spaces:

atz21
/

eGrade

Sleeping

App Files Files Community

atz21 commited on Aug 18, 2025

Commit

2d32051

verified ·

1 Parent(s): 305575c

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -176

app.py CHANGED Viewed

@@ -1,229 +1,241 @@
 # app.py
 import os
-import gradio as gr
-import PyPDF2
 import traceback
 try:
     import google.generativeai as genai
-except Exception:
     genai = None
-# ---------- Configuration ---------------------------------------------------
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", None)
-MODEL_NAME = "gemini-2.5-pro"  # change if needed
-if genai and GEMINI_API_KEY:
-    try:
         genai.configure(api_key=GEMINI_API_KEY)
-        # instantiate model object (older SDK style)
-        model = genai.GenerativeModel(MODEL_NAME)
-    except Exception as e:
-        print("Warning: could not configure genai:", e)
-        model = None
 else:
-    model = None
-# ---------- Utilities -------------------------------------------------------
-def extract_text_from_pdf(file_obj) -> str:
-    """
-    Extract text from a PDF file-like object using PyPDF2.
-    file_obj is a file-like object (what Gradio File provides).
-    """
-    try:
-        # PyPDF2 PdfReader can read file-like objects
-        reader = PyPDF2.PdfReader(file_obj)
-        pages = []
-        for p in reader.pages:
-            text = p.extract_text()
-            if text:
-                pages.append(text)
-        return "\n\n".join(pages).strip()
-    except Exception as e:
-        # fallback: try to read raw bytes and decode (not ideal)
-        try:
-            file_obj.seek(0)
-            raw = file_obj.read()
-            # best-effort decode
-            return raw.decode(errors="ignore")
-        except Exception:
-            return f"[Error extracting text: {e}]"
-# ---------- Prompt templates ------------------------------------------------
 TRANSCRIPTION_INSTRUCTIONS = """
-You are an expert transcriber. Cleanly transcribe the student's answer sheet contained below.
-Rules:
-1. Keep section headings as Markdown headings (e.g., ## Question 1).
-2. Render any mathematical notation using LaTeX between $...$ for inline or $$...$$ for display.
-3. Preserve numbering and sub-numbering (a), (i), etc.
-4. If handwriting or characters are illegible or missing, mark them as [???] inline.
-5. Normalize spacing, remove repeated hyphens/headers from PDF conversion noise.
-6. For any short answer where student left blank, write [BLANK].
-7. Output ONLY the transcription in well-formatted Markdown with LaTeX where appropriate.
-8. Keep the transcription faithful; do not "correct" student's conceptual errors.
 """
 GRADING_INSTRUCTIONS = """
-You are an experienced examiner. Use the Question Paper (QP), the Marking Scheme (MS), and the STUDENT TRANSCRIPTION to grade the student's answers.
-Rules:
-1. Follow the MS strictly: allocate marks per the marking scheme and apply fractional marks when indicated.
-2. If the student's answer is missing or [BLANK], award 0 marks for that part unless MS instructs otherwise.
-3. When partial credit applies, explain what was missing and why partial marks were given.
-4. If the student copied the question or gave an irrelevant answer, award 0 and add a brief reason.
-5. Use negative marking only if MS explicitly instructs it.
-6. Output the grading result as a JSON object ONLY (no extra commentary) with the following structure:
-{
-  "total_marks": <int>,
-  "marks_obtained": <int>,
-  "percentage": <float>,
-  "per_question": {
-      "Q1": {"max_marks": <int>, "awarded": <int>, "notes": "<string>"},
-      "Q2": {...}
-  },
-  "high_level_feedback": "<short summary feedback to student (1-3 sentences)>"
-}
-Make sure numeric fields are numeric (not strings). Use plain JSON (no markdown fences).
 """
-# ---------- Model functions -------------------------------------------------
-def call_gemini(prompt: str, system: str = None, max_tokens: int = 1024):
     """
-    Call the Gemini model (if configured). Returns model text.
-    If model not available, raise or return an error string.
     """
-    if model is None:
-        raise RuntimeError("Gemini model is not configured. Set GEMINI_API_KEY and install google-generativeai.")
-    # generate_content expects a string prompt (or list). We'll call synchronously.
     try:
-        # Compose contents: system instruction optionally and prompt
-        contents = []
-        if system:
-            contents.append(system)
-        contents.append(prompt)
-        resp = model.generate_content(contents)
-        # Many SDK responses have .text attribute
-        text = getattr(resp, "text", None)
-        if text is None:
-            # try to string-concat chunks or .content
-            text = str(resp)
-        return text
     except Exception as e:
-        # bubble up a helpful message
-        raise RuntimeError(f"Error calling Gemini: {e}\n{traceback.format_exc()}")
-# ---------- Gradio app functions -------------------------------------------
-def transcribe_step(question_pdf, scheme_pdf, answer_pdf):
     """
-    Extract text and run transcription prompt. Returns transcription text and a state dict.
     """
-    # check files present
-    if not (question_pdf and scheme_pdf and answer_pdf):
-        return "Please upload all three PDFs (Question Paper, Marking Scheme, Answer Sheet).", None
-    # read file-like objects (gradio provides TemporaryFile-like objects)
     try:
-        question_pdf.file.seek(0)
-        q_text = extract_text_from_pdf(question_pdf.file)
     except Exception as e:
-        q_text = f"[Error reading Question Paper PDF: {e}]"
     try:
-        scheme_pdf.file.seek(0)
-        ms_text = extract_text_from_pdf(scheme_pdf.file)
     except Exception as e:
-        ms_text = f"[Error reading Marking Scheme PDF: {e}]"
     try:
-        answer_pdf.file.seek(0)
-        ans_text = extract_text_from_pdf(answer_pdf.file)
     except Exception as e:
-        ans_text = f"[Error reading Answer Sheet PDF: {e}]"
-    # If model is available, run transcription prompt; else return extracted raw text
-    if model:
-        transcription_prompt = TRANSCRIPTION_INSTRUCTIONS + "\n\n" + "ANSWER SHEET CONTENT (begin):\n" + ans_text + "\n\n(END of answer sheet)"
-        try:
-            transcription = call_gemini(transcription_prompt, system="You are a precise transcription assistant.", max_tokens=2000)
-        except Exception as e:
-            transcription = f"[Gemini transcription failed: {e}]\n\nFalling back to raw extracted text:\n\n" + ans_text
-    else:
-        transcription = "[Gemini not configured — showing best-effort extracted text]\n\n" + ans_text
-    # state to carry forward
-    state = {
-        "q_text": q_text,
-        "ms_text": ms_text,
-        "ans_text": ans_text,
-        "transcription": transcription
-    }
-    return transcription, state
-def grade_step(state):
     """
-    Use the state produced by transcribe_step to call grading prompt.
     """
-    if state is None:
-        return "No transcription state found. Run the Transcribe step first."
-    q_text = state.get("q_text", "")
-    ms_text = state.get("ms_text", "")
-    transcription = state.get("transcription", "")
-    if model:
         grading_prompt = (
-            GRADING_INSTRUCTIONS
-            + "\n\nQUESTION PAPER (begin):\n" + q_text + "\n\nQUESTION PAPER (end)\n\n"
-            + "MARKING SCHEME (begin):\n" + ms_text + "\n\nMARKING SCHEME (end)\n\n"
-            + "STUDENT TRANSCRIPTION (begin):\n" + transcription + "\n\nSTUDENT TRANSCRIPTION (end)\n\n"
-            + "Produce the JSON grading result now."
         )
-        try:
-            grading_json = call_gemini(grading_prompt, system="You are an expert examiner and must respond only with the requested JSON.", max_tokens=2000)
-        except Exception as e:
-            grading_json = f"[Gemini grading failed: {e}]\n\n"
-    else:
-        grading_json = "[Gemini not configured — grading unavailable.]\n\nPlease set GEMINI_API_KEY to enable grading."
-    return grading_json
-# ---------- Gradio UI ------------------------------------------------------
-with gr.Blocks(title="Transcribe & Grade — Exam Papers") as demo:
-    gr.Markdown("## Upload: Question Paper, Marking Scheme, Answer Sheet (PDFs)")
     with gr.Row():
         qp_in = gr.File(label="Question Paper (PDF)", file_count="single", type="file")
         ms_in = gr.File(label="Marking Scheme (PDF)", file_count="single", type="file")
         ans_in = gr.File(label="Answer Sheet (PDF)", file_count="single", type="file")
-    trans_btn = gr.Button("Transcribe Answer Sheet")
-    transcription_out = gr.Textbox(lines=20, label="Transcription (Markdown + LaTeX)", interactive=False)
-    state_store = gr.State(value=None)
-    def _on_transcribe(qp, ms, ans, _state):
-        trans, new_state = transcribe_step(qp, ms, ans)
-        return trans, new_state
-    trans_btn.click(_on_transcribe, inputs=[qp_in, ms_in, ans_in, state_store], outputs=[transcription_out, state_store])
-    gr.Markdown("## Grading")
-    grade_btn = gr.Button("Grade from Transcription")
-    grading_out = gr.Textbox(lines=20, label="Grading Result (JSON)", interactive=False)
-    def _on_grade(_state):
-        return grade_step(_state)
-    grade_btn.click(_on_grade, inputs=[state_store], outputs=[grading_out])
-    gr.Markdown("### Notes")
-    gr.Markdown(
-        "- First click **Transcribe Answer Sheet**. Review the transcription output.\n"
-        "- Then click **Grade from Transcription** to produce the JSON grading result.\n"
-        "- If you see messages about Gemini not being configured, set `GEMINI_API_KEY` in your environment and restart the app.\n"
-        "- Adjust `MODEL_NAME` at the top of this file if you want a different Gemini model."
-    )
-# ---------- Run -----------------------------------------------------------
 if __name__ == "__main__":
-    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)

 # app.py
+# Gradio app for transcription + grading using Google Gemini
+# Author: generated for your notebook logic (adapted and sanitized)
 import os
+import tempfile
+import io
 import traceback
+import gradio as gr
 try:
     import google.generativeai as genai
+except Exception as e:
     genai = None
+# ---- Configuration ----
+# IMPORTANT: Do NOT hardcode your API key here.
+# Set environment variable GEMINI_API_KEY in Hugging Face Spaces Secrets.
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", None)
+if GEMINI_API_KEY:
+    if genai is not None:
         genai.configure(api_key=GEMINI_API_KEY)
 else:
+    # genai may be None if package not installed; Gradio UI will show an error if user tries to run
+    pass
+# ---- Long instructions copied-from-notebook (transcription) ----
 TRANSCRIPTION_INSTRUCTIONS = """
+Persona:
+You are an expert transcriptionist specializing in scientific and mathematical documents. Your primary goal is to convert handwritten mathematical work into a perfectly formatted, machine-readable Markdown document using LaTeX for all mathematical notation.
+Core Task:
+Your task is to transcribe the provided handwritten student solutions into a single, clean Markdown string.
+Key Directives & Rules:
+Absolute Fidelity: Transcribe exactly what is written. Do NOT correct mathematical errors, logical fallacies, or spelling mistakes. Your role is purely that of a scribe, not a grader or editor.
+LaTeX for All Math: All mathematical content—including single variables, numbers in equations, fractions, exponents, roots, and symbols—must be enclosed in LaTeX delimiters. Use inline $ ... $ for math within text and block $$ ... $$ for standalone equations.
+Handle Strikethroughs: Completely ignore and omit any text, numbers, or expressions that have been struck through by the student. Do not include them in the final output.
+Preserve Structure:
+Use Markdown bolding (e.g., **1.**, **2a.**) to clearly separate each question or sub-part.
+Maintain the vertical, step-by-step flow of the student's derivations. For multi-line aligned equations, use the \\begin{align*} ... \\end{align*} environment within a $$ ... $$ block.
+Handle Ambiguity: If a character or symbol is genuinely illegible or ambiguous, make your best interpretation and enclose it in square brackets. For example, if a variable could be u or v, write [u?].
+Output Format:
+The final output must be a single Markdown string.
+Ensure all LaTeX renders correctly and the structure is clean and readable.
 """
+# ---- Grading system instructions (as in notebook) ----
 GRADING_INSTRUCTIONS = """
+Instructions to Examiners:
+Abbreviations:
+- M: Marks for correct Method.
+- A: Marks for Answer or Accuracy (often depends on preceding M mark).
+- R: Marks for clear Reasoning.
+- AG: Answer given in the question; no marks awarded.
+- FT: Follow Through; award marks for correct method/answer using incorrect earlier results.
+Marking Rules:
+1. Always follow the markscheme annotations (M1, A2, etc.).
+2. M marks must be earned before dependent A marks are awarded (no M0 followed by A1 unless explicitly allowed).
+3. If M and A marks are on the same line (e.g., M1A1), M is for the method attempt, A is for correct values.
+4. Multiple A marks on the same line are awarded independently unless otherwise noted.
+5. Do not split M2, A3, etc. unless instructed.
+6. "Show that" responses do not need to restate the AG line unless noted.
+7. Once a correct answer is seen, ignore further incorrect working unless it affects a later part (then apply FT as appropriate).
+8. Do not award the final A mark if an incorrect approximation is used in the same part.
+Error Avoidance:
+- No incorrect mark allocation: Do not award marks unless they are explicitly justified by the markscheme.
+- No misclassification of errors: Distinguish correctly between "Conceptual Errors" and "Silly Mistakes."
+- Follow markscheme logic exactly: Especially regarding when to withhold accuracy marks if method marks are not earned.
 """
+# ---- Helper functions ----
+def ensure_genai_available():
+    if genai is None:
+        raise RuntimeError("google-generativeai package is not available. Make sure it's in requirements.txt.")
+    if not GEMINI_API_KEY:
+        raise RuntimeError("GEMINI_API_KEY not set. Set it in environment/secrets before running the app.")
+def _save_temp_file(uploaded_file) -> str:
     """
+    uploaded_file is a file-like object provided by Gradio (temp file path).
+    Returns a path to a saved temp file we can pass to genai.upload_file.
     """
+    if uploaded_file is None:
+        raise ValueError("No file provided.")
+    # Gradio gives a dict with 'name' and 'data' in some modes; but usually it's a path
+    # Attempt to handle multiple types robustly
+    if isinstance(uploaded_file, str):
+        return uploaded_file  # already a path
+    # Otherwise write bytes to a temp file
+    data = None
     try:
+        # uploaded_file may be a file-like with .read()
+        data = uploaded_file.read()
+    except Exception:
+        # uploaded_file may be a tuple returned by gr.File: (name, data)
+        try:
+            data = uploaded_file[0].read()
+        except Exception:
+            raise
+    fd, path = tempfile.mkstemp(suffix=".pdf")
+    os.close(fd)
+    with open(path, "wb") as f:
+        f.write(data)
+    return path
+def upload_file_to_gemini(local_path, display_name="file"):
+    """
+    Upload a local file path to Gemini using genai.upload_file and return the file object (as returned).
+    """
+    ensure_genai_available()
+    # The API used in original notebook: genai.upload_file(path=...)
+    # We'll use the same call and return the object
+    try:
+        file_obj = genai.upload_file(path=local_path, display_name=display_name)
+        return file_obj
     except Exception as e:
+        # Surface the error
+        raise RuntimeError(f"Failed to upload file to Gemini: {e}")
+def call_gemini_generate(inputs_list):
     """
+    Call Gemini generative model with the provided inputs list (strings and/or uploaded file objects).
+    Returns the textual content (tries several extraction methods).
     """
+    ensure_genai_available()
     try:
+        model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
+        response = model.generate_content(inputs_list)
+        text = getattr(response, "text", None)
+        if not text:
+            # try legacy path
+            if hasattr(response, "candidates") and response.candidates:
+                # drill into candidates
+                try:
+                    text = response.candidates[0].content.parts[0].text
+                except Exception:
+                    text = str(response.candidates[0])
+        if not text:
+            text = str(response)
+        return text
     except Exception as e:
+        raise RuntimeError(f"Gemini generation failed: {e}")
+# ---- Core operations ----
+def transcribe_answer_sheet(answersheet_file):
+    """
+    Save the uploaded answersheet, upload to Gemini, and request transcription.
+    Returns the transcription string.
+    """
     try:
+        ensure_genai_available()
     except Exception as e:
+        return f"ERROR: {e}"
     try:
+        local_ans_path = _save_temp_file(answersheet_file)
+        uploaded_ans = upload_file_to_gemini(local_ans_path, display_name="Answer Sheet")
+        # Call Gemini to transcribe (instructions + uploaded file)
+        response_text = call_gemini_generate([TRANSCRIPTION_INSTRUCTIONS, uploaded_ans])
+        return response_text
     except Exception as e:
+        tb = traceback.format_exc()
+        return f"Transcription failed: {e}\n\n{tb}"
+def grade_answer(qp_file, ms_file, transcription_text):
     """
+    Upload QP and MS, then call Gemini with grading instructions + the transcription to obtain grading output.
     """
+    try:
+        ensure_genai_available()
+    except Exception as e:
+        return f"ERROR: {e}"
+    if transcription_text is None or transcription_text.strip() == "":
+        return "ERROR: Empty transcription. Please run transcription first or provide transcription text."
+    try:
+        local_qp = _save_temp_file(qp_file)
+        local_ms = _save_temp_file(ms_file)
+        uploaded_qp = upload_file_to_gemini(local_qp, display_name="Question Paper")
+        uploaded_ms = upload_file_to_gemini(local_ms, display_name="Marking Scheme")
+        # Build the prompt combining grading instructions + strict rules (as in the notebook)
         grading_prompt = (
+            "You are an official examiner. Use the following grading system and rules to assess the answers:\n\n"
+            + GRADING_INSTRUCTIONS
+            + "\n\nYour output must:\n"
+            "1. Apply marks exactly as per the markscheme.\n"
+            "2. Justify each awarded or withheld mark with reference to the grading rules.\n"
+            "3. Identify and classify all errors accurately (Conceptual Error, Silly Mistake, or None).\n"
+            "4. Follow the dependency between M and A marks strictly.\n"
+            "5. Avoid giving marks that the markscheme does not allow.\n"
+            "6. Provide a step-by-step reasoning for each mark awarded or withheld, explaining your thought process clearly.\n"
         )
+        response_text = call_gemini_generate([grading_prompt, uploaded_qp, uploaded_ms, transcription_text])
+        return response_text
+    except Exception as e:
+        tb = traceback.format_exc()
+        return f"Grading failed: {e}\n\n{tb}"
+# ---- Gradio UI ----
+with gr.Blocks(title="Exam Transcription & Grading (Gemini)") as demo:
+    gr.Markdown(
+        """
+        # Exam Transcription & Grading
+        Upload three PDFs: Question Paper, Marking Scheme, and Answer Sheet.
+        Click **Transcribe** to get a LaTeX-friendly Markdown transcription of the student's handwritten answers.
+        Click **Grade** to apply the marking scheme to the transcription and get a detailed grading justification.
+        **Important:** set `GEMINI_API_KEY` in environment/secrets before using.
+        """
+    )
     with gr.Row():
         qp_in = gr.File(label="Question Paper (PDF)", file_count="single", type="file")
         ms_in = gr.File(label="Marking Scheme (PDF)", file_count="single", type="file")
         ans_in = gr.File(label="Answer Sheet (PDF)", file_count="single", type="file")
+    with gr.Row():
+        transcribe_btn = gr.Button("Transcribe Answer Sheet")
+        grade_btn = gr.Button("Grade (use existing transcription)")
+    transcription_out = gr.Textbox(label="Transcription (Markdown + LaTeX)", lines=20)
+    grading_out = gr.Textbox(label="Grading Result + Justification", lines=20)
+    # Wire buttons
+    transcribe_btn.click(fn=transcribe_answer_sheet, inputs=[ans_in], outputs=[transcription_out])
+    # Grade uses QP, MS and transcription textbox as inputs
+    grade_btn.click(fn=grade_answer, inputs=[qp_in, ms_in, transcription_out], outputs=[grading_out])
+    # Provide quick example text area for transcription override (optional)
+    gr.Markdown("If you already have a prepared transcription (or want to edit before grading), paste it below and click Grade.")
+    transcription_manual = gr.Textbox(label="Optional: Edit/Provide Transcription (overrides auto)", lines=8)
+    grade_with_manual_btn = gr.Button("Grade Using Provided Transcription")
+    grade_with_manual_btn.click(fn=grade_answer, inputs=[qp_in, ms_in, transcription_manual], outputs=[grading_out])
+    gr.Markdown("⚠️ Note: This app depends on Google Gemini `google-generativeai` SDK and a valid `GEMINI_API_KEY` environment variable.")
 if __name__ == "__main__":
+    demo.launch()