atz21 commited on
Commit
1c7ddd9
·
verified ·
1 Parent(s): 03fd1fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -66
app.py CHANGED
@@ -1,77 +1,229 @@
 
 
1
  import gradio as gr
2
- import google.generativeai as genai
3
-
4
- # 🔑 Configure Gemini
5
- GEMINI_API_KEY = "YOUR_API_KEY_HERE"
6
- genai.configure(api_key=GEMINI_API_KEY)
7
-
8
- # Initialize model
9
- model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
10
-
11
- # ---------- STEP 1: TRANSCRIPTION ----------
12
- def transcribe_files(qp_file, ms_file, ans_file):
13
- # Upload Answer Sheet
14
- uploaded_as = genai.upload_file(path=ans_file.name, display_name="Answer Sheet")
15
-
16
- transcription_instructions = """
17
- Persona:
18
- You are an expert transcriptionist specializing in scientific and mathematical documents.
19
- Your task is to transcribe the provided handwritten student solutions into Markdown+LaTeX.
20
- Follow these rules:
21
- - Use LaTeX for all math ($ ... $ or $$ ... $$).
22
- - Do not correct mistakes, just transcribe.
23
- - Ignore strikethroughs.
24
- - Use **bold** for question numbering.
25
- - Preserve step-by-step derivations.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- response = model.generate_content([transcription_instructions, uploaded_as])
29
- transcription = getattr(response, "text", None)
30
- if not transcription and response.candidates:
31
- transcription = response.candidates[0].content.parts[0].text
32
- return transcription or "No transcription generated."
33
-
34
- # ---------- STEP 2: GRADING ----------
35
- def grade_files(qp_file, ms_file, ans_file, transcription):
36
- # Upload QP and MS
37
- uploaded_qp = genai.upload_file(path=qp_file.name, display_name="Question Paper")
38
- uploaded_ms = genai.upload_file(path=ms_file.name, display_name="Marking Scheme")
39
-
40
- grading_system = """
41
- Instructions to Examiners:
42
- - M: Method marks
43
- - A: Accuracy marks
44
- - FT: Follow-through rules
45
- - Apply marking strictly as per scheme.
 
 
 
 
46
  """
 
 
47
 
48
- response = model.generate_content([
49
- f"You are an examiner. Grade the transcription using the rules:\n{grading_system}",
50
- uploaded_qp,
51
- uploaded_ms,
52
- transcription
53
- ])
54
- grading = getattr(response, "text", None)
55
- if not grading and response.candidates:
56
- grading = response.candidates[0].content.parts[0].text
57
- return grading or "No grading generated."
58
-
59
- # ---------- GRADIO UI ----------
60
- with gr.Blocks() as demo:
61
- gr.Markdown("## 📘 Automated Transcription & Grading System")
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  with gr.Row():
64
- qp = gr.File(label="Upload Question Paper (PDF)")
65
- ms = gr.File(label="Upload Marking Scheme (PDF)")
66
- ans = gr.File(label="Upload Answer Sheet (PDF)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- transcribe_btn = gr.Button("🔍 Transcribe Answer Sheet")
69
- transcription_output = gr.Textbox(label="Transcription", lines=20)
70
 
71
- grade_btn = gr.Button("✅ Grade Answers")
72
- grading_output = gr.Textbox(label="Grading Result", lines=20)
73
 
74
- transcribe_btn.click(fn=transcribe_files, inputs=[qp, ms, ans], outputs=transcription_output)
75
- grade_btn.click(fn=grade_files, inputs=[qp, ms, ans, transcription_output], outputs=grading_output)
 
 
 
 
 
76
 
77
- demo.launch()
 
 
 
1
+ # app.py
2
+ import os
3
  import gradio as gr
4
+ import PyPDF2
5
+ import traceback
6
+
7
+ try:
8
+ import google.generativeai as genai
9
+ except Exception:
10
+ genai = None
11
+
12
+ # ---------- Configuration ---------------------------------------------------
13
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", None)
14
+ MODEL_NAME = "gemini-2.5-pro" # change if needed
15
+
16
+ if genai and GEMINI_API_KEY:
17
+ try:
18
+ genai.configure(api_key=GEMINI_API_KEY)
19
+ # instantiate model object (older SDK style)
20
+ model = genai.GenerativeModel(MODEL_NAME)
21
+ except Exception as e:
22
+ print("Warning: could not configure genai:", e)
23
+ model = None
24
+ else:
25
+ model = None
26
+
27
+ # ---------- Utilities -------------------------------------------------------
28
+ def extract_text_from_pdf(file_obj) -> str:
29
+ """
30
+ Extract text from a PDF file-like object using PyPDF2.
31
+ file_obj is a file-like object (what Gradio File provides).
32
+ """
33
+ try:
34
+ # PyPDF2 PdfReader can read file-like objects
35
+ reader = PyPDF2.PdfReader(file_obj)
36
+ pages = []
37
+ for p in reader.pages:
38
+ text = p.extract_text()
39
+ if text:
40
+ pages.append(text)
41
+ return "\n\n".join(pages).strip()
42
+ except Exception as e:
43
+ # fallback: try to read raw bytes and decode (not ideal)
44
+ try:
45
+ file_obj.seek(0)
46
+ raw = file_obj.read()
47
+ # best-effort decode
48
+ return raw.decode(errors="ignore")
49
+ except Exception:
50
+ return f"[Error extracting text: {e}]"
51
+
52
+ # ---------- Prompt templates ------------------------------------------------
53
+ TRANSCRIPTION_INSTRUCTIONS = """
54
+ You are an expert transcriber. Cleanly transcribe the student's answer sheet contained below.
55
+ Rules:
56
+ 1. Keep section headings as Markdown headings (e.g., ## Question 1).
57
+ 2. Render any mathematical notation using LaTeX between $...$ for inline or $$...$$ for display.
58
+ 3. Preserve numbering and sub-numbering (a), (i), etc.
59
+ 4. If handwriting or characters are illegible or missing, mark them as [???] inline.
60
+ 5. Normalize spacing, remove repeated hyphens/headers from PDF conversion noise.
61
+ 6. For any short answer where student left blank, write [BLANK].
62
+ 7. Output ONLY the transcription in well-formatted Markdown with LaTeX where appropriate.
63
+ 8. Keep the transcription faithful; do not "correct" student's conceptual errors.
64
+ """
65
+
66
+ GRADING_INSTRUCTIONS = """
67
+ You are an experienced examiner. Use the Question Paper (QP), the Marking Scheme (MS), and the STUDENT TRANSCRIPTION to grade the student's answers.
68
+ Rules:
69
+ 1. Follow the MS strictly: allocate marks per the marking scheme and apply fractional marks when indicated.
70
+ 2. If the student's answer is missing or [BLANK], award 0 marks for that part unless MS instructs otherwise.
71
+ 3. When partial credit applies, explain what was missing and why partial marks were given.
72
+ 4. If the student copied the question or gave an irrelevant answer, award 0 and add a brief reason.
73
+ 5. Use negative marking only if MS explicitly instructs it.
74
+ 6. Output the grading result as a JSON object ONLY (no extra commentary) with the following structure:
75
+
76
+ {
77
+ "total_marks": <int>,
78
+ "marks_obtained": <int>,
79
+ "percentage": <float>,
80
+ "per_question": {
81
+ "Q1": {"max_marks": <int>, "awarded": <int>, "notes": "<string>"},
82
+ "Q2": {...}
83
+ },
84
+ "high_level_feedback": "<short summary feedback to student (1-3 sentences)>"
85
+ }
86
+
87
+ Make sure numeric fields are numeric (not strings). Use plain JSON (no markdown fences).
88
+ """
89
+
90
+ # ---------- Model functions -------------------------------------------------
91
+ def call_gemini(prompt: str, system: str = None, max_tokens: int = 1024):
92
+ """
93
+ Call the Gemini model (if configured). Returns model text.
94
+ If model not available, raise or return an error string.
95
+ """
96
+ if model is None:
97
+ raise RuntimeError("Gemini model is not configured. Set GEMINI_API_KEY and install google-generativeai.")
98
+ # generate_content expects a string prompt (or list). We'll call synchronously.
99
+ try:
100
+ # Compose contents: system instruction optionally and prompt
101
+ contents = []
102
+ if system:
103
+ contents.append(system)
104
+ contents.append(prompt)
105
+ resp = model.generate_content(contents)
106
+ # Many SDK responses have .text attribute
107
+ text = getattr(resp, "text", None)
108
+ if text is None:
109
+ # try to string-concat chunks or .content
110
+ text = str(resp)
111
+ return text
112
+ except Exception as e:
113
+ # bubble up a helpful message
114
+ raise RuntimeError(f"Error calling Gemini: {e}\n{traceback.format_exc()}")
115
+
116
+ # ---------- Gradio app functions -------------------------------------------
117
+ def transcribe_step(question_pdf, scheme_pdf, answer_pdf):
118
  """
119
+ Extract text and run transcription prompt. Returns transcription text and a state dict.
120
+ """
121
+ # check files present
122
+ if not (question_pdf and scheme_pdf and answer_pdf):
123
+ return "Please upload all three PDFs (Question Paper, Marking Scheme, Answer Sheet).", None
124
+
125
+ # read file-like objects (gradio provides TemporaryFile-like objects)
126
+ try:
127
+ question_pdf.file.seek(0)
128
+ q_text = extract_text_from_pdf(question_pdf.file)
129
+ except Exception as e:
130
+ q_text = f"[Error reading Question Paper PDF: {e}]"
131
+
132
+ try:
133
+ scheme_pdf.file.seek(0)
134
+ ms_text = extract_text_from_pdf(scheme_pdf.file)
135
+ except Exception as e:
136
+ ms_text = f"[Error reading Marking Scheme PDF: {e}]"
137
+
138
+ try:
139
+ answer_pdf.file.seek(0)
140
+ ans_text = extract_text_from_pdf(answer_pdf.file)
141
+ except Exception as e:
142
+ ans_text = f"[Error reading Answer Sheet PDF: {e}]"
143
 
144
+ # If model is available, run transcription prompt; else return extracted raw text
145
+ if model:
146
+ transcription_prompt = TRANSCRIPTION_INSTRUCTIONS + "\n\n" + "ANSWER SHEET CONTENT (begin):\n" + ans_text + "\n\n(END of answer sheet)"
147
+ try:
148
+ transcription = call_gemini(transcription_prompt, system="You are a precise transcription assistant.", max_tokens=2000)
149
+ except Exception as e:
150
+ transcription = f"[Gemini transcription failed: {e}]\n\nFalling back to raw extracted text:\n\n" + ans_text
151
+ else:
152
+ transcription = "[Gemini not configured — showing best-effort extracted text]\n\n" + ans_text
153
+
154
+ # state to carry forward
155
+ state = {
156
+ "q_text": q_text,
157
+ "ms_text": ms_text,
158
+ "ans_text": ans_text,
159
+ "transcription": transcription
160
+ }
161
+ return transcription, state
162
+
163
+ def grade_step(state):
164
+ """
165
+ Use the state produced by transcribe_step to call grading prompt.
166
  """
167
+ if state is None:
168
+ return "No transcription state found. Run the Transcribe step first."
169
 
170
+ q_text = state.get("q_text", "")
171
+ ms_text = state.get("ms_text", "")
172
+ transcription = state.get("transcription", "")
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ if model:
175
+ grading_prompt = (
176
+ GRADING_INSTRUCTIONS
177
+ + "\n\nQUESTION PAPER (begin):\n" + q_text + "\n\nQUESTION PAPER (end)\n\n"
178
+ + "MARKING SCHEME (begin):\n" + ms_text + "\n\nMARKING SCHEME (end)\n\n"
179
+ + "STUDENT TRANSCRIPTION (begin):\n" + transcription + "\n\nSTUDENT TRANSCRIPTION (end)\n\n"
180
+ + "Produce the JSON grading result now."
181
+ )
182
+ try:
183
+ grading_json = call_gemini(grading_prompt, system="You are an expert examiner and must respond only with the requested JSON.", max_tokens=2000)
184
+ except Exception as e:
185
+ grading_json = f"[Gemini grading failed: {e}]\n\n"
186
+ else:
187
+ grading_json = "[Gemini not configured — grading unavailable.]\n\nPlease set GEMINI_API_KEY to enable grading."
188
+
189
+ return grading_json
190
+
191
+ # ---------- Gradio UI ------------------------------------------------------
192
+ with gr.Blocks(title="Transcribe & Grade — Exam Papers") as demo:
193
+ gr.Markdown("## Upload: Question Paper, Marking Scheme, Answer Sheet (PDFs)")
194
  with gr.Row():
195
+ qp_in = gr.File(label="Question Paper (PDF)", file_count="single", type="file")
196
+ ms_in = gr.File(label="Marking Scheme (PDF)", file_count="single", type="file")
197
+ ans_in = gr.File(label="Answer Sheet (PDF)", file_count="single", type="file")
198
+
199
+ trans_btn = gr.Button("Transcribe Answer Sheet")
200
+ transcription_out = gr.Textbox(lines=20, label="Transcription (Markdown + LaTeX)", interactive=False)
201
+
202
+ state_store = gr.State(value=None)
203
+
204
+ def _on_transcribe(qp, ms, ans, _state):
205
+ trans, new_state = transcribe_step(qp, ms, ans)
206
+ return trans, new_state
207
+
208
+ trans_btn.click(_on_transcribe, inputs=[qp_in, ms_in, ans_in, state_store], outputs=[transcription_out, state_store])
209
+
210
+ gr.Markdown("## Grading")
211
+ grade_btn = gr.Button("Grade from Transcription")
212
+ grading_out = gr.Textbox(lines=20, label="Grading Result (JSON)", interactive=False)
213
 
214
+ def _on_grade(_state):
215
+ return grade_step(_state)
216
 
217
+ grade_btn.click(_on_grade, inputs=[state_store], outputs=[grading_out])
 
218
 
219
+ gr.Markdown("### Notes")
220
+ gr.Markdown(
221
+ "- First click **Transcribe Answer Sheet**. Review the transcription output.\n"
222
+ "- Then click **Grade from Transcription** to produce the JSON grading result.\n"
223
+ "- If you see messages about Gemini not being configured, set `GEMINI_API_KEY` in your environment and restart the app.\n"
224
+ "- Adjust `MODEL_NAME` at the top of this file if you want a different Gemini model."
225
+ )
226
 
227
+ # ---------- Run -----------------------------------------------------------
228
+ if __name__ == "__main__":
229
+ demo.launch(share=False, server_name="0.0.0.0", server_port=7860)