Spaces:

atz21
/

eGrade

Sleeping

App Files Files Community

atz21 commited on Aug 18, 2025

Commit

1c7ddd9

verified ·

1 Parent(s): 03fd1fb

Update app.py

Browse files

Files changed (1) hide show

app.py +218 -66

app.py CHANGED Viewed

@@ -1,77 +1,229 @@
 import gradio as gr
-import google.generativeai as genai
-# 🔑 Configure Gemini
-GEMINI_API_KEY = "YOUR_API_KEY_HERE"
-genai.configure(api_key=GEMINI_API_KEY)
-# Initialize model
-model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
-# ---------- STEP 1: TRANSCRIPTION ----------
-def transcribe_files(qp_file, ms_file, ans_file):
-    # Upload Answer Sheet
-    uploaded_as = genai.upload_file(path=ans_file.name, display_name="Answer Sheet")
-    transcription_instructions = """
-    Persona:
-    You are an expert transcriptionist specializing in scientific and mathematical documents.
-    Your task is to transcribe the provided handwritten student solutions into Markdown+LaTeX.
-    Follow these rules:
-    - Use LaTeX for all math ($ ... $ or $$ ... $$).
-    - Do not correct mistakes, just transcribe.
-    - Ignore strikethroughs.
-    - Use **bold** for question numbering.
-    - Preserve step-by-step derivations.
     """
-    response = model.generate_content([transcription_instructions, uploaded_as])
-    transcription = getattr(response, "text", None)
-    if not transcription and response.candidates:
-        transcription = response.candidates[0].content.parts[0].text
-    return transcription or "No transcription generated."
-# ---------- STEP 2: GRADING ----------
-def grade_files(qp_file, ms_file, ans_file, transcription):
-    # Upload QP and MS
-    uploaded_qp = genai.upload_file(path=qp_file.name, display_name="Question Paper")
-    uploaded_ms = genai.upload_file(path=ms_file.name, display_name="Marking Scheme")
-    grading_system = """
-    Instructions to Examiners:
-    - M: Method marks
-    - A: Accuracy marks
-    - FT: Follow-through rules
-    - Apply marking strictly as per scheme.
     """
-    response = model.generate_content([
-        f"You are an examiner. Grade the transcription using the rules:\n{grading_system}",
-        uploaded_qp,
-        uploaded_ms,
-        transcription
-    ])
-    grading = getattr(response, "text", None)
-    if not grading and response.candidates:
-        grading = response.candidates[0].content.parts[0].text
-    return grading or "No grading generated."
-# ---------- GRADIO UI ----------
-with gr.Blocks() as demo:
-    gr.Markdown("## 📘 Automated Transcription & Grading System")
     with gr.Row():
-        qp = gr.File(label="Upload Question Paper (PDF)")
-        ms = gr.File(label="Upload Marking Scheme (PDF)")
-        ans = gr.File(label="Upload Answer Sheet (PDF)")
-    transcribe_btn = gr.Button("🔍 Transcribe Answer Sheet")
-    transcription_output = gr.Textbox(label="Transcription", lines=20)
-    grade_btn = gr.Button("✅ Grade Answers")
-    grading_output = gr.Textbox(label="Grading Result", lines=20)
-    transcribe_btn.click(fn=transcribe_files, inputs=[qp, ms, ans], outputs=transcription_output)
-    grade_btn.click(fn=grade_files, inputs=[qp, ms, ans, transcription_output], outputs=grading_output)
-demo.launch()

+# app.py
+import os
 import gradio as gr
+import PyPDF2
+import traceback
+try:
+    import google.generativeai as genai
+except Exception:
+    genai = None
+# ---------- Configuration ---------------------------------------------------
+GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", None)
+MODEL_NAME = "gemini-2.5-pro"  # change if needed
+if genai and GEMINI_API_KEY:
+    try:
+        genai.configure(api_key=GEMINI_API_KEY)
+        # instantiate model object (older SDK style)
+        model = genai.GenerativeModel(MODEL_NAME)
+    except Exception as e:
+        print("Warning: could not configure genai:", e)
+        model = None
+else:
+    model = None
+# ---------- Utilities -------------------------------------------------------
+def extract_text_from_pdf(file_obj) -> str:
+    """
+    Extract text from a PDF file-like object using PyPDF2.
+    file_obj is a file-like object (what Gradio File provides).
+    """
+    try:
+        # PyPDF2 PdfReader can read file-like objects
+        reader = PyPDF2.PdfReader(file_obj)
+        pages = []
+        for p in reader.pages:
+            text = p.extract_text()
+            if text:
+                pages.append(text)
+        return "\n\n".join(pages).strip()
+    except Exception as e:
+        # fallback: try to read raw bytes and decode (not ideal)
+        try:
+            file_obj.seek(0)
+            raw = file_obj.read()
+            # best-effort decode
+            return raw.decode(errors="ignore")
+        except Exception:
+            return f"[Error extracting text: {e}]"
+# ---------- Prompt templates ------------------------------------------------
+TRANSCRIPTION_INSTRUCTIONS = """
+You are an expert transcriber. Cleanly transcribe the student's answer sheet contained below.
+Rules:
+1. Keep section headings as Markdown headings (e.g., ## Question 1).
+2. Render any mathematical notation using LaTeX between $...$ for inline or $$...$$ for display.
+3. Preserve numbering and sub-numbering (a), (i), etc.
+4. If handwriting or characters are illegible or missing, mark them as [???] inline.
+5. Normalize spacing, remove repeated hyphens/headers from PDF conversion noise.
+6. For any short answer where student left blank, write [BLANK].
+7. Output ONLY the transcription in well-formatted Markdown with LaTeX where appropriate.
+8. Keep the transcription faithful; do not "correct" student's conceptual errors.
+"""
+GRADING_INSTRUCTIONS = """
+You are an experienced examiner. Use the Question Paper (QP), the Marking Scheme (MS), and the STUDENT TRANSCRIPTION to grade the student's answers.
+Rules:
+1. Follow the MS strictly: allocate marks per the marking scheme and apply fractional marks when indicated.
+2. If the student's answer is missing or [BLANK], award 0 marks for that part unless MS instructs otherwise.
+3. When partial credit applies, explain what was missing and why partial marks were given.
+4. If the student copied the question or gave an irrelevant answer, award 0 and add a brief reason.
+5. Use negative marking only if MS explicitly instructs it.
+6. Output the grading result as a JSON object ONLY (no extra commentary) with the following structure:
+{
+  "total_marks": <int>,
+  "marks_obtained": <int>,
+  "percentage": <float>,
+  "per_question": {
+      "Q1": {"max_marks": <int>, "awarded": <int>, "notes": "<string>"},
+      "Q2": {...}
+  },
+  "high_level_feedback": "<short summary feedback to student (1-3 sentences)>"
+}
+Make sure numeric fields are numeric (not strings). Use plain JSON (no markdown fences).
+"""
+# ---------- Model functions -------------------------------------------------
+def call_gemini(prompt: str, system: str = None, max_tokens: int = 1024):
+    """
+    Call the Gemini model (if configured). Returns model text.
+    If model not available, raise or return an error string.
+    """
+    if model is None:
+        raise RuntimeError("Gemini model is not configured. Set GEMINI_API_KEY and install google-generativeai.")
+    # generate_content expects a string prompt (or list). We'll call synchronously.
+    try:
+        # Compose contents: system instruction optionally and prompt
+        contents = []
+        if system:
+            contents.append(system)
+        contents.append(prompt)
+        resp = model.generate_content(contents)
+        # Many SDK responses have .text attribute
+        text = getattr(resp, "text", None)
+        if text is None:
+            # try to string-concat chunks or .content
+            text = str(resp)
+        return text
+    except Exception as e:
+        # bubble up a helpful message
+        raise RuntimeError(f"Error calling Gemini: {e}\n{traceback.format_exc()}")
+# ---------- Gradio app functions -------------------------------------------
+def transcribe_step(question_pdf, scheme_pdf, answer_pdf):
     """
+    Extract text and run transcription prompt. Returns transcription text and a state dict.
+    """
+    # check files present
+    if not (question_pdf and scheme_pdf and answer_pdf):
+        return "Please upload all three PDFs (Question Paper, Marking Scheme, Answer Sheet).", None
+    # read file-like objects (gradio provides TemporaryFile-like objects)
+    try:
+        question_pdf.file.seek(0)
+        q_text = extract_text_from_pdf(question_pdf.file)
+    except Exception as e:
+        q_text = f"[Error reading Question Paper PDF: {e}]"
+    try:
+        scheme_pdf.file.seek(0)
+        ms_text = extract_text_from_pdf(scheme_pdf.file)
+    except Exception as e:
+        ms_text = f"[Error reading Marking Scheme PDF: {e}]"
+    try:
+        answer_pdf.file.seek(0)
+        ans_text = extract_text_from_pdf(answer_pdf.file)
+    except Exception as e:
+        ans_text = f"[Error reading Answer Sheet PDF: {e}]"
+    # If model is available, run transcription prompt; else return extracted raw text
+    if model:
+        transcription_prompt = TRANSCRIPTION_INSTRUCTIONS + "\n\n" + "ANSWER SHEET CONTENT (begin):\n" + ans_text + "\n\n(END of answer sheet)"
+        try:
+            transcription = call_gemini(transcription_prompt, system="You are a precise transcription assistant.", max_tokens=2000)
+        except Exception as e:
+            transcription = f"[Gemini transcription failed: {e}]\n\nFalling back to raw extracted text:\n\n" + ans_text
+    else:
+        transcription = "[Gemini not configured — showing best-effort extracted text]\n\n" + ans_text
+    # state to carry forward
+    state = {
+        "q_text": q_text,
+        "ms_text": ms_text,
+        "ans_text": ans_text,
+        "transcription": transcription
+    }
+    return transcription, state
+def grade_step(state):
+    """
+    Use the state produced by transcribe_step to call grading prompt.
     """
+    if state is None:
+        return "No transcription state found. Run the Transcribe step first."
+    q_text = state.get("q_text", "")
+    ms_text = state.get("ms_text", "")
+    transcription = state.get("transcription", "")
+    if model:
+        grading_prompt = (
+            GRADING_INSTRUCTIONS
+            + "\n\nQUESTION PAPER (begin):\n" + q_text + "\n\nQUESTION PAPER (end)\n\n"
+            + "MARKING SCHEME (begin):\n" + ms_text + "\n\nMARKING SCHEME (end)\n\n"
+            + "STUDENT TRANSCRIPTION (begin):\n" + transcription + "\n\nSTUDENT TRANSCRIPTION (end)\n\n"
+            + "Produce the JSON grading result now."
+        )
+        try:
+            grading_json = call_gemini(grading_prompt, system="You are an expert examiner and must respond only with the requested JSON.", max_tokens=2000)
+        except Exception as e:
+            grading_json = f"[Gemini grading failed: {e}]\n\n"
+    else:
+        grading_json = "[Gemini not configured — grading unavailable.]\n\nPlease set GEMINI_API_KEY to enable grading."
+    return grading_json
+# ---------- Gradio UI ------------------------------------------------------
+with gr.Blocks(title="Transcribe & Grade — Exam Papers") as demo:
+    gr.Markdown("## Upload: Question Paper, Marking Scheme, Answer Sheet (PDFs)")
     with gr.Row():
+        qp_in = gr.File(label="Question Paper (PDF)", file_count="single", type="file")
+        ms_in = gr.File(label="Marking Scheme (PDF)", file_count="single", type="file")
+        ans_in = gr.File(label="Answer Sheet (PDF)", file_count="single", type="file")
+    trans_btn = gr.Button("Transcribe Answer Sheet")
+    transcription_out = gr.Textbox(lines=20, label="Transcription (Markdown + LaTeX)", interactive=False)
+    state_store = gr.State(value=None)
+    def _on_transcribe(qp, ms, ans, _state):
+        trans, new_state = transcribe_step(qp, ms, ans)
+        return trans, new_state
+    trans_btn.click(_on_transcribe, inputs=[qp_in, ms_in, ans_in, state_store], outputs=[transcription_out, state_store])
+    gr.Markdown("## Grading")
+    grade_btn = gr.Button("Grade from Transcription")
+    grading_out = gr.Textbox(lines=20, label="Grading Result (JSON)", interactive=False)
+    def _on_grade(_state):
+        return grade_step(_state)
+    grade_btn.click(_on_grade, inputs=[state_store], outputs=[grading_out])
+    gr.Markdown("### Notes")
+    gr.Markdown(
+        "- First click **Transcribe Answer Sheet**. Review the transcription output.\n"
+        "- Then click **Grade from Transcription** to produce the JSON grading result.\n"
+        "- If you see messages about Gemini not being configured, set `GEMINI_API_KEY` in your environment and restart the app.\n"
+        "- Adjust `MODEL_NAME` at the top of this file if you want a different Gemini model."
+    )
+# ---------- Run -----------------------------------------------------------
+if __name__ == "__main__":
+    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)