atz21 commited on
Commit
db86cb5
·
verified ·
1 Parent(s): cda728b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -76
app.py CHANGED
@@ -7,6 +7,11 @@ import json
7
  import traceback
8
  import re
9
  import concurrent.futures
 
 
 
 
 
10
 
11
  # ---------- PROMPTS ----------
12
  PROMPTS = {
@@ -20,17 +25,14 @@ Each object must have exactly these keys:
20
  - "qp": string (exact question text or "[Not found]")
21
  - "ms": string (relevant markscheme text or "[Not found]")
22
  - "as": string (final cleaned student answer; "[No response]" or "[illegible]" if needed)
23
-
24
  ### Numbering Rules
25
  - Always use **logical order of questions** (1, 2, 3, …) regardless of how they are labeled in the PDF.
26
  - If the QP shows a mismatch (e.g., under "Question 1" the serial number says "12"), **still treat it as Q1**.
27
  - Subparts must be written in standard form (e.g., "1(a)", "1(b)(ii)").
28
-
29
  ### Formatting Rules
30
  - Preserve math inside fenced code ```...```.
31
  - If diagram/graph missing, write "[Graph omitted]".
32
  - Do not add extra commentary outside JSON.
33
-
34
  ## Example
35
  [
36
  {
@@ -52,7 +54,6 @@ Each object must have exactly these keys:
52
  - **AG**: Answer given in question—no marks
53
  - **FT**: Follow Through marks (if error carried forward correctly)
54
  - **MR**: Deduct for misread (once only)
55
-
56
  ---
57
  ## Grading Instructions
58
  1. Award marks using official annotations (e.g., M1, A2).
@@ -62,16 +63,13 @@ Each object must have exactly these keys:
62
  5. Apply FT where appropriate.
63
  6. Use proper notation: M1A0, A1, etc.
64
  7. Any lost mark: use red `<span style="color:red">M0</span>` and make Reason red.
65
-
66
  ---
67
  ## Output Format
68
  1. Produce a GitHub-flavored Markdown table with columns:
69
  | Student wrote | Marks Awarded | Reason |
70
  - Each row = one markable step/point, in order.
71
  - For blanks: “(no answer)” with marks lost.
72
-
73
  2. After the table, write ONLY one line for total marks in the form: Final Marks: X / Y
74
-
75
  ⚠️ Do NOT include summaries, error classifications, or extra commentary.
76
  Only table + final marks line.
77
  """
@@ -83,7 +81,6 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
83
 
84
  # ---------- HELPER: Save to PDF ----------
85
  def save_as_pdf(text, filename="output.pdf"):
86
- print(f"📄 Saving grading report to PDF: {filename}")
87
  pdf = MarkdownPdf()
88
  pdf.add_section(Section(text, toc=False))
89
  pdf.save(filename)
@@ -91,17 +88,14 @@ def save_as_pdf(text, filename="output.pdf"):
91
 
92
  # ---------- HELPER: Compress PDF ----------
93
  def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
94
- print(f"🗜️ Checking if compression needed for {input_path}...")
95
  if output_path is None:
96
  base, ext = os.path.splitext(input_path)
97
  output_path = f"{base}_compressed{ext}"
98
 
99
  if os.path.getsize(input_path) <= max_size:
100
- print("✅ No compression needed")
101
  return input_path
102
 
103
  try:
104
- print(f"⚡ Compressing {input_path} → {output_path}")
105
  gs_cmd = [
106
  "gs", "-sDEVICE=pdfwrite",
107
  "-dCompatibilityLevel=1.4",
@@ -110,24 +104,18 @@ def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
110
  f"-sOutputFile={output_path}", input_path
111
  ]
112
  subprocess.run(gs_cmd, check=True)
113
-
114
  if os.path.getsize(output_path) <= max_size:
115
- print("✅ Compression successful")
116
  return output_path
117
  else:
118
- print("⚠️ Compression did not reduce size enough, using original")
119
  return input_path
120
- except Exception as e:
121
- print(f"❌ Compression failed: {e}")
122
  return input_path
123
 
124
  # ---------- HELPER: Create Model with Fallback ----------
125
  def create_model():
126
  try:
127
- print("⚡ Using gemini-2.5-pro model")
128
  return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
129
  except Exception:
130
- print("⚡ Falling back to gemini-2.5-flash model")
131
  return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
132
 
133
  # ---------- HELPER: Clean JSON Output ----------
@@ -141,26 +129,21 @@ def clean_json_output(raw_text: str) -> str:
141
  return cleaned
142
 
143
  # ---------- PIPELINE: ALIGN + GRADE ----------
144
- def align_and_grade(qp_file, ms_file, ans_file):
145
  try:
146
- print("🚀 Starting alignment + grading pipeline")
147
-
148
- # Step 0: Compress if needed
149
- print("🔍 Step 0: Compressing PDFs (if needed)")
150
  qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
151
  ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
152
  ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
153
 
154
  # Step 1: Uploads
155
- print("📤 Step 1: Uploading PDFs to Gemini...")
156
  qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
157
  ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
158
  ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
159
 
160
  model = create_model()
161
 
162
- # Step 2: Alignment → JSON
163
- print("🧩 Step 2: Aligning QP, MS, and AS into JSON...")
164
  resp = model.generate_content([
165
  PROMPTS["ALIGNMENT_PROMPT"]["content"],
166
  qp_uploaded,
@@ -172,21 +155,11 @@ def align_and_grade(qp_file, ms_file, ans_file):
172
  aligned_json = resp.candidates[0].content.parts[0].text
173
 
174
  aligned_json = clean_json_output(aligned_json)
 
175
 
176
- try:
177
- questions = json.loads(aligned_json)
178
- print(f"✅ Parsed JSON with {len(questions)} questions")
179
- except Exception as e:
180
- print("❌ JSON parsing failed")
181
- traceback.print_exc()
182
- return f"❌ JSON parsing error: {e}", None, None
183
-
184
- # Step 3: Grading (parallelized but order preserved)
185
- print("📝 Step 3: Grading each question in parallel...")
186
-
187
  def grade_one(idx_q):
188
  idx, q = idx_q
189
- print(f" ➡️ Grading Question {q['question_number']}")
190
  q_json = json.dumps(q, indent=2)
191
  response = model.generate_content([
192
  PROMPTS["GRADING_PROMPT"]["content"],
@@ -199,61 +172,120 @@ def align_and_grade(qp_file, ms_file, ans_file):
199
 
200
  with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
201
  results = list(executor.map(grade_one, enumerate(questions)))
202
-
203
- # Sort results back into original order
204
  results.sort(key=lambda x: x[0])
205
 
206
- # Step 4: Build report and marks summary
207
  grading_sections = []
208
- marks_summary = []
209
- total_awarded, total_possible = 0, 0
210
-
211
  for _, qnum, grading_piece in results:
212
  section = f"## Question {qnum}\n\n{grading_piece}"
213
  grading_sections.append(section)
214
 
215
- # Extract marks from "Final Marks: X / Y"
216
- match = re.search(r"Final Marks:\s*(\d+)\s*/\s*(\d+)", grading_piece)
217
- if match:
218
- awarded, possible = int(match.group(1)), int(match.group(2))
219
- marks_summary.append((qnum, awarded, possible))
220
- total_awarded += awarded
221
- total_possible += possible
222
- else:
223
- marks_summary.append((qnum, 0, 0))
224
-
225
- # Build summary table
226
- summary_table = ["\n\n# Final Marks Summary\n",
227
- "| Question | Marks Awarded | Total Marks |",
228
- "|----------|---------------|-------------|"]
229
- for qnum, awarded, possible in marks_summary:
230
- summary_table.append(f"| {qnum} | {awarded} | {possible} |")
231
- summary_table.append(f"| **Total** | **{total_awarded}** | **{total_possible}** |")
232
-
233
- grading_report = "\n\n".join(grading_sections) + "\n".join(summary_table)
234
-
235
- # Step 5: Save grading report (Markdown → PDF)
236
- print("📄 Step 5: Saving grading report to PDF...")
237
  base_name = os.path.splitext(os.path.basename(ans_file))[0]
238
  grading_pdf_path = save_as_pdf(grading_report, f"{base_name}_graded.pdf")
239
 
240
- print("✅ Pipeline completed successfully")
241
- return json.dumps(questions, indent=2), grading_report, grading_pdf_path
 
 
 
242
 
243
  except Exception as e:
244
- print("❌ Fatal error in pipeline")
245
  traceback.print_exc()
246
- return f"❌ Error: {e}", None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
  # ---------- GRADIO APP ----------
249
- with gr.Blocks(title="LeadIB AI Grading (JSON Alignment + Auto-Grading)") as demo:
250
- gr.Markdown("## LeadIB AI Grading\nUpload Question Paper, Markscheme, and Student Answer Sheet.\nThe system will align (as JSON) and grade automatically.")
251
 
252
  with gr.Row():
253
  qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
254
  ms_file = gr.File(label="Upload Markscheme (PDF)", type="filepath")
255
  ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
256
 
 
257
  run_btn = gr.Button("Start Alignment + Auto-Grading")
258
 
259
  with gr.Row():
@@ -261,16 +293,17 @@ with gr.Blocks(title="LeadIB AI Grading (JSON Alignment + Auto-Grading)") as dem
261
 
262
  with gr.Row():
263
  grading_out = gr.Textbox(label="✅ Grading Report (Markdown)", lines=20)
 
 
264
  grading_pdf = gr.File(label="⬇️ Download Grading Report (PDF)")
 
265
 
266
  run_btn.click(
267
  fn=align_and_grade,
268
- inputs=[qp_file, ms_file, ans_file],
269
- outputs=[aligned_out, grading_out, grading_pdf],
270
  show_progress=True
271
  )
272
 
273
  if __name__ == "__main__":
274
  demo.launch()
275
-
276
-
 
7
  import traceback
8
  import re
9
  import concurrent.futures
10
+ from pdf2image import convert_from_path
11
+ from PIL import Image, ImageDraw, ImageFont
12
+ import cv2
13
+ import numpy as np
14
+ import img2pdf
15
 
16
  # ---------- PROMPTS ----------
17
  PROMPTS = {
 
25
  - "qp": string (exact question text or "[Not found]")
26
  - "ms": string (relevant markscheme text or "[Not found]")
27
  - "as": string (final cleaned student answer; "[No response]" or "[illegible]" if needed)
 
28
  ### Numbering Rules
29
  - Always use **logical order of questions** (1, 2, 3, …) regardless of how they are labeled in the PDF.
30
  - If the QP shows a mismatch (e.g., under "Question 1" the serial number says "12"), **still treat it as Q1**.
31
  - Subparts must be written in standard form (e.g., "1(a)", "1(b)(ii)").
 
32
  ### Formatting Rules
33
  - Preserve math inside fenced code ```...```.
34
  - If diagram/graph missing, write "[Graph omitted]".
35
  - Do not add extra commentary outside JSON.
 
36
  ## Example
37
  [
38
  {
 
54
  - **AG**: Answer given in question—no marks
55
  - **FT**: Follow Through marks (if error carried forward correctly)
56
  - **MR**: Deduct for misread (once only)
 
57
  ---
58
  ## Grading Instructions
59
  1. Award marks using official annotations (e.g., M1, A2).
 
63
  5. Apply FT where appropriate.
64
  6. Use proper notation: M1A0, A1, etc.
65
  7. Any lost mark: use red `<span style="color:red">M0</span>` and make Reason red.
 
66
  ---
67
  ## Output Format
68
  1. Produce a GitHub-flavored Markdown table with columns:
69
  | Student wrote | Marks Awarded | Reason |
70
  - Each row = one markable step/point, in order.
71
  - For blanks: “(no answer)” with marks lost.
 
72
  2. After the table, write ONLY one line for total marks in the form: Final Marks: X / Y
 
73
  ⚠️ Do NOT include summaries, error classifications, or extra commentary.
74
  Only table + final marks line.
75
  """
 
81
 
82
  # ---------- HELPER: Save to PDF ----------
83
  def save_as_pdf(text, filename="output.pdf"):
 
84
  pdf = MarkdownPdf()
85
  pdf.add_section(Section(text, toc=False))
86
  pdf.save(filename)
 
88
 
89
  # ---------- HELPER: Compress PDF ----------
90
  def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
 
91
  if output_path is None:
92
  base, ext = os.path.splitext(input_path)
93
  output_path = f"{base}_compressed{ext}"
94
 
95
  if os.path.getsize(input_path) <= max_size:
 
96
  return input_path
97
 
98
  try:
 
99
  gs_cmd = [
100
  "gs", "-sDEVICE=pdfwrite",
101
  "-dCompatibilityLevel=1.4",
 
104
  f"-sOutputFile={output_path}", input_path
105
  ]
106
  subprocess.run(gs_cmd, check=True)
 
107
  if os.path.getsize(output_path) <= max_size:
 
108
  return output_path
109
  else:
 
110
  return input_path
111
+ except Exception:
 
112
  return input_path
113
 
114
  # ---------- HELPER: Create Model with Fallback ----------
115
  def create_model():
116
  try:
 
117
  return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
118
  except Exception:
 
119
  return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
120
 
121
  # ---------- HELPER: Clean JSON Output ----------
 
129
  return cleaned
130
 
131
  # ---------- PIPELINE: ALIGN + GRADE ----------
132
+ def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
133
  try:
134
+ # Step 0: Compress
 
 
 
135
  qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
136
  ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
137
  ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
138
 
139
  # Step 1: Uploads
 
140
  qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
141
  ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
142
  ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
143
 
144
  model = create_model()
145
 
146
+ # Step 2: Alignment
 
147
  resp = model.generate_content([
148
  PROMPTS["ALIGNMENT_PROMPT"]["content"],
149
  qp_uploaded,
 
155
  aligned_json = resp.candidates[0].content.parts[0].text
156
 
157
  aligned_json = clean_json_output(aligned_json)
158
+ questions = json.loads(aligned_json)
159
 
160
+ # Step 3: Grading
 
 
 
 
 
 
 
 
 
 
161
  def grade_one(idx_q):
162
  idx, q = idx_q
 
163
  q_json = json.dumps(q, indent=2)
164
  response = model.generate_content([
165
  PROMPTS["GRADING_PROMPT"]["content"],
 
172
 
173
  with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
174
  results = list(executor.map(grade_one, enumerate(questions)))
 
 
175
  results.sort(key=lambda x: x[0])
176
 
177
+ # Step 4: Build report
178
  grading_sections = []
179
+ grading_json = {"grading": []}
 
 
180
  for _, qnum, grading_piece in results:
181
  section = f"## Question {qnum}\n\n{grading_piece}"
182
  grading_sections.append(section)
183
 
184
+ # Extract marks list
185
+ marks_list = re.findall(r"(M[01]|A[0-9]|R[01])", grading_piece)
186
+ grading_json["grading"].append({"question": qnum, "marks_awarded": marks_list})
187
+
188
+ grading_report = "\n\n".join(grading_sections)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  base_name = os.path.splitext(os.path.basename(ans_file))[0]
190
  grading_pdf_path = save_as_pdf(grading_report, f"{base_name}_graded.pdf")
191
 
192
+ imprint_pdf_path = None
193
+ if imprint:
194
+ imprint_pdf_path = imprint_marks(ans_file, grading_json, model)
195
+
196
+ return json.dumps(questions, indent=2), grading_report, grading_pdf_path, imprint_pdf_path
197
 
198
  except Exception as e:
 
199
  traceback.print_exc()
200
+ return f"❌ Error: {e}", None, None, None
201
+
202
+ # ---------- PIPELINE: IMPRINT MARKS ----------
203
+ def imprint_marks(ans_pdf, grading_json, model, grid_rows=20, grid_cols=14):
204
+ output_dir = "grid_pages"
205
+ os.makedirs(output_dir, exist_ok=True)
206
+ pages = convert_from_path(ans_pdf, dpi=200)
207
+ page_images = []
208
+
209
+ # Create grid images
210
+ for i, page in enumerate(pages):
211
+ img_path = os.path.join(output_dir, f"page_{i+1}_grid.png")
212
+ img = page.convert("RGB")
213
+ draw = ImageDraw.Draw(img)
214
+ w, h = img.size
215
+ cell_w, cell_h = w / grid_cols, h / grid_rows
216
+
217
+ try:
218
+ num_font = ImageFont.truetype("arial.ttf", 20)
219
+ except IOError:
220
+ num_font = ImageFont.load_default()
221
+
222
+ cell_num = 1
223
+ for r in range(grid_rows):
224
+ for c in range(grid_cols):
225
+ x = int(c * cell_w + cell_w / 2)
226
+ y = int(r * cell_h + cell_h / 2)
227
+ text = str(cell_num)
228
+ bbox = draw.textbbox((0, 0), text, font=num_font)
229
+ tw = bbox[2] - bbox[0]
230
+ th = bbox[3] - bbox[1]
231
+ draw.text((x - tw/2, y - th/2), text, fill="black", font=num_font)
232
+ cell_num += 1
233
+ img.save(img_path, "PNG")
234
+ page_images.append(img_path)
235
+
236
+ annotated_pages = []
237
+ for idx, page in enumerate(pages):
238
+ # Ask Gemini for mapping
239
+ prompt = f"""
240
+ You are an exam marker. The page is divided into a {grid_rows} x {grid_cols} grid with numbered cells.
241
+ Return JSON: [{{"question": "1(a)", "cell_number": 15}}, ...]
242
+ Grading JSON:
243
+ {json.dumps(grading_json, indent=2)}
244
+ """
245
+ response = model.generate_content([prompt, Image.open(page_images[idx])])
246
+ mapping_text = getattr(response, "text", "")
247
+ match = re.search(r'\[.*\]', mapping_text, re.DOTALL)
248
+ mapping = json.loads(match.group(0)) if match else []
249
+
250
+ # Annotate
251
+ img = np.array(page.convert("RGB"))
252
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
253
+ h, w, _ = img.shape
254
+ cell_w, cell_h = w / grid_cols, h / grid_rows
255
+
256
+ for item in mapping:
257
+ q = item["question"]
258
+ cell_number = item["cell_number"]
259
+ row = (cell_number - 1) // grid_cols
260
+ col = (cell_number - 1) % grid_cols
261
+
262
+ marks_list = next((g["marks_awarded"] for g in grading_json["grading"] if g["question"] == q), [])
263
+ marks_text = ",".join(marks_list)
264
+
265
+ x_c = int((col+1) * cell_w - cell_w/4)
266
+ y_c = int((row+0.5) * cell_h)
267
+ cv2.putText(img, marks_text, (x_c, y_c), cv2.FONT_HERSHEY_SIMPLEX,
268
+ 1.5, (0, 0, 255), 3, cv2.LINE_AA)
269
+
270
+ annotated_path = os.path.join(output_dir, f"annotated_{idx+1}.png")
271
+ cv2.imwrite(annotated_path, img)
272
+ annotated_pages.append(annotated_path)
273
+
274
+ output_pdf = "answer_sheet_with_marks.pdf"
275
+ with open(output_pdf, "wb") as f:
276
+ f.write(img2pdf.convert(annotated_pages))
277
+ return output_pdf
278
 
279
  # ---------- GRADIO APP ----------
280
+ with gr.Blocks(title="LeadIB AI Grading with Optional Imprinting") as demo:
281
+ gr.Markdown("## LeadIB AI Grading\nUpload QP, MS, and AS. Get aligned JSON, grading report, and optionally imprint marks on the answer sheet.")
282
 
283
  with gr.Row():
284
  qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
285
  ms_file = gr.File(label="Upload Markscheme (PDF)", type="filepath")
286
  ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
287
 
288
+ imprint_opt = gr.Checkbox(label="Imprint Marks on Answer Sheet?", value=False)
289
  run_btn = gr.Button("Start Alignment + Auto-Grading")
290
 
291
  with gr.Row():
 
293
 
294
  with gr.Row():
295
  grading_out = gr.Textbox(label="✅ Grading Report (Markdown)", lines=20)
296
+
297
+ with gr.Row():
298
  grading_pdf = gr.File(label="⬇️ Download Grading Report (PDF)")
299
+ imprint_pdf = gr.File(label="⬇️ Download Answer Sheet with Imprinted Marks (PDF)")
300
 
301
  run_btn.click(
302
  fn=align_and_grade,
303
+ inputs=[qp_file, ms_file, ans_file, imprint_opt],
304
+ outputs=[aligned_out, grading_out, grading_pdf, imprint_pdf],
305
  show_progress=True
306
  )
307
 
308
  if __name__ == "__main__":
309
  demo.launch()