Spaces:
Sleeping
Sleeping
File size: 39,074 Bytes
419e7c7 6941b48 419e7c7 6941b48 2b866da 6941b48 419e7c7 6941b48 b248fb0 6941b48 f999bc3 6941b48 618ee94 f999bc3 618ee94 6941b48 618ee94 6941b48 618ee94 6941b48 618ee94 6941b48 618ee94 6941b48 618ee94 6941b48 618ee94 6941b48 618ee94 6941b48 618ee94 6941b48 618ee94 6941b48 618ee94 f999bc3 f92014f f999bc3 f92014f f999bc3 d146b76 f92014f f999bc3 618ee94 f999bc3 b248fb0 6941b48 b248fb0 6941b48 b248fb0 6941b48 b248fb0 6941b48 b248fb0 6941b48 b248fb0 2b866da 419e7c7 2b866da 419e7c7 2b866da 6941b48 b248fb0 f1f0ca1 b248fb0 419e7c7 b248fb0 419e7c7 6941b48 419e7c7 6941b48 2dd3b2b 419e7c7 2dd3b2b 419e7c7 2dd3b2b 419e7c7 2dd3b2b 419e7c7 6941b48 419e7c7 f1f0ca1 419e7c7 f1f0ca1 419e7c7 b248fb0 6941b48 27ea33f 2b866da 27ea33f b248fb0 bf851f8 9b0a372 a5a195e 9b0a372 bf851f8 a5a195e 9b0a372 bf851f8 a5a195e 9b0a372 bf851f8 9b0a372 bf851f8 9b0a372 bf851f8 9b0a372 bf851f8 9b0a372 bf851f8 9b0a372 a5a195e 9b0a372 a5a195e 9b0a372 a5a195e 9b0a372 a5a195e bf851f8 5268488 a5a195e 5268488 b248fb0 6941b48 b248fb0 2b866da b248fb0 6941b48 b248fb0 6941b48 b248fb0 6941b48 b248fb0 6941b48 419e7c7 559b8c9 b248fb0 559b8c9 b248fb0 559b8c9 332de5f 559b8c9 b248fb0 559b8c9 b248fb0 559b8c9 419e7c7 559b8c9 6941b48 c962bfa 419e7c7 b248fb0 559b8c9 2b866da b248fb0 559b8c9 b248fb0 559b8c9 2b866da 559b8c9 419e7c7 559b8c9 b248fb0 559b8c9 b248fb0 559b8c9 b248fb0 de357f2 b248fb0 de357f2 b248fb0 559b8c9 b248fb0 559b8c9 b248fb0 559b8c9 b248fb0 559b8c9 b248fb0 a5a195e 2b866da a5a195e 419e7c7 a5a195e 419e7c7 332de5f a5a195e 419e7c7 a5a195e 5084988 419e7c7 a5a195e 2b866da a5a195e 419e7c7 a5a195e 419e7c7 a5a195e 419e7c7 a5a195e 2b866da a5a195e b248fb0 419e7c7 b248fb0 419e7c7 b248fb0 559b8c9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 |
import os
import re
import json
import subprocess
import time
import img2pdf
import gradio as gr
from google import genai # NEW SDK
from markdown_pdf import MarkdownPdf, Section
from pdf2image import convert_from_path
from PIL import Image, ImageDraw, ImageFont
import cv2
import numpy as np
from PyPDF2 import PdfReader, PdfWriter
# ---------------- CONFIG ----------------
# Create client with new SDK
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
GRID_ROWS, GRID_COLS = 20, 14
# ---------------- PROMPTS ----------------
PROMPTS = {
"QP_MS_TRANSCRIPTION": {
"role": "system",
"content": """You are a high-quality OCR/Transcription assistant.
INPUT: This file is a PDF that first contains the Question Paper and immediately after it the Markscheme.
TASK:
1. Transcribe EXACTLY all the questions FIRST (with their total marks).
2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
3. Always number the questions sequentially (Question 1, Question 2, Question 3, β¦) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank. Never start a question other than question 1 (even if it is labelled in pdf as 8 name it 1).
4. If a question or sub-question is labelled with a letter (e.g., "Q1.a", "Q2(b)", "1 (c)(i)"), transcribe it as "Question 1.a", "Question 2.b", "Question 1.c.i" etc., exactly preserving the hierarchy of sub-question identifiers.
5. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
FORMAT:
==== PAPER TOTAL MARKS ====
<total marks>
==== QUESTIONS BEGIN ====
Question 1.a
Total Marks: <number>
QP: <question text>
--QUESTION-END--
Question 1.b
Total Marks: <number>
QP: <question text>
--QUESTION-END--
Question 2
Total Marks: <number>
QP: <question text>
--QUESTION-END--
(repeat for all questions in order of appearance)
==== QUESTIONS END ====
==== MARKSCHEME BEGIN ====
Answer 1.a:
<exact MS for Q1.a with notations M1, A1, R1 etc>
Answer 1.b:
<exact MS for Q1.b with notations>
Answer 2 :
<exact MS for Q2 with notations>
(repeat for all answers)
==== MARKSCHEME END ====
==== GRAPH EXPECTED QUESTIONS ====
Graph expected in:
- Question <number> β Page <number>
(one per line)
==== END GRAPH EXPECTED ====
"""
},
"GRADING_PROMPT": {
"role": "system",
"content": """You are an official examiner. Apply the following grading rules precisely and consistently.
### Mark Abbreviations:
- **M**: Method marks β awarded for correct mathematical procedures, approaches, or techniques
- **A**: Accuracy/Answer marks β awarded for correct final or intermediate answers
- **R**: Reasoning marks β awarded for justifications, explanations, or logical deductions
- **AG**: Answer Given β the answer is provided in the question; award no marks for simply stating it
- **FT**: Follow Through β marks awarded when a student correctly applies a method using their own previous (incorrect) answer
- **MR**: Misread β penalty applied when student misreads a value from the question (deduct from first applicable A-mark only, once per question)
---
## Grading Rules
### Core Principles:
1. **Award marks using official annotations** (e.g., M1, A2, R1).
2. **Do not award full marks for answers alone** β check that the required method steps are present.
3. **A-marks typically depend on M-marks** β an A-mark usually requires the corresponding M-mark to be earned first (unless the markscheme explicitly states otherwise).
4. **Accept equivalent forms** unless the markscheme specifies exact form (e.g., "simplified form only").
5. **Apply Follow Through (FT)** when a student uses an incorrect answer correctly in subsequent steps.
6. **Misread (MR) Penalty**: If a student misreads a numerical value from the question:
- Deduct from the **first applicable A-mark** in that question only
- Apply MR penalty **once per question** (not per sub-question)
- M-marks can still be awarded if the method is correct
- Annotate as: `A0 (MR applied)`
### Formatting Lost Marks:
- **Lost marks must be highlighted in red**: `<span style="color:red">M0</span>`, `<span style="color:red">A0</span>`, etc.
- **In the table**: Use red styling for "Awarded" column when mark is lost
- **Do use red** for markscheme expectations or student responses themselves when mark is lost
### Graph/Diagram Questions:
- When graph/diagram images are provided, describe visual evidence in the "Examiner Notes" column
- Examples: "Correct parabola shape, y-intercept matches", "Line has wrong gradient", "Asymptote missing"
---
## Output Format
Produce the following structure for each question/sub-question:
### Question <1.a>
**Markscheme vs Student Answer**
| Mark ID | Markscheme Expectation | Student's Response | Awarded | Examiner Notes |
|---------|------------------------|-------------------|---------|----------------|
| M1 | Use product rule: $u'v + uv'$ | Student wrote: $u'v + uv'$ β | M1 | Correct method applied |
| A1 | Final answer: $2xe^x + e^x$ | Student answer: $2xe^x + e^x$ β | A1 | Correct, depends on M1 |
**Total: X/Y**
---
*(Repeat for all questions)*
---
### Examiner's Summary Report
**IMPORTANT**: Group all sub-questions under their parent question. Sum the marks for all sub-parts (e.g., 1.a, 1.b, 1.c) and report as a single entry for Question 1.
**Format Rules for Summary Report**:
- If a question has sub-parts (1.a, 1.b, etc.), group them as "Question 1" with combined marks
- If a question has no sub-parts (just "Question 2"), report it directly
- Assign ONE overall remark per grouped question based on the predominant error type across all sub-parts
| Question Number | Marks | Remark | Feedback |
|-----------------|-------|--------|----------|
| 1 | 10/12 | A | Strong answer, only minor mistake |
| 2 | 5/8 | B | Good attempt, missing some detail |
| 3 | 7/10 | C | Adequate, but lacked depth/clarity |
| β¦ | β¦ | β¦ | β¦ (continue for all answers) |
...(repeat for all answers)
**Example Explanation**:
- Question 1 has sub-parts 1.a (3/5), 1.b (5/7), 1.c (2/0) β Total: (3+5+2)/(5+7+0) = 10/12
- Question 2 has sub-parts 2.a (2/3), 2.b (3/5) β Total: (2+3)/(3+5) = 5/8
- Question 3 has no sub-parts β Report as-is: 7/10
**Total: <obtained_marks>/<max_marks>**
---
## Remark Codes (assign ONE per grouped question):
- **A**: All Good β mostly full marks across sub-parts, no major errors
- **B**: Silly Mistake β minor arithmetic/algebraic slips (e.g., $2 + 3 = 6$, sign error in final step)
- **C**: Conceptual Error β wrong formula, incorrect method, fundamental misunderstanding in one or more sub-parts
- **D**: Hard Question β question is inherently difficult; partial credit reflects genuine attempt
- **E**: Not Applicable β question not attempted, or answer entirely illegible/missing
**Remark Selection for Grouped Questions**:
- If all sub-parts are correct β **A**
- If majority are correct with 1-2 arithmetic errors β **B**
- If one or more sub-parts show conceptual errors β **C**
- If question is difficult and student made reasonable attempt β **D**
- If all sub-parts are missing/illegible β **E**
---
## Additional Instructions:
- You will receive:
1. **QP+MS transcript** (authoritative source for question wording, total marks, and markscheme with M/A/R notation)
2. **AS transcript** (student answers in LaTeX-formatted markdown)
3. **Graph images** (if applicable) for questions involving diagrams
- Match student answers to question IDs from the QP+MS transcript.
- Grade according to the **verbatim markscheme**, but accept mathematically/conceptually equivalent answers (justify in "Examiner Notes").
- For graph questions, use provided images as visual context and describe what you observe.
- Ensure mark IDs in your grading table match those in the markscheme.
- Be consistent: if a student makes the same type of error multiple times, apply the same penalty logic each time.
"""
}
}
# ---------------- HELPERS ----------------
def save_as_pdf(text, filename="output.pdf"):
pdf = MarkdownPdf()
pdf.add_section(Section(text, toc=False))
pdf.save(filename)
return filename
def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
if output_path is None:
base, ext = os.path.splitext(input_path)
output_path = f"{base}_compressed{ext}"
try:
size = os.path.getsize(input_path)
except Exception:
return input_path
if size <= max_size:
print(f"βΉοΈ Not compressing {input_path} ({size/1024/1024:.2f} MB <= {max_size/1024/1024} MB)")
return input_path
print(f"π Compressing {input_path} ({size/1024/1024:.2f} MB) -> {output_path}")
try:
gs_cmd = [
"gs", "-sDEVICE=pdfwrite",
"-dCompatibilityLevel=1.4",
"-dPDFSETTINGS=/ebook",
"-dNOPAUSE", "-dQUIET", "-dBATCH",
f"-sOutputFile={output_path}", input_path
]
subprocess.run(gs_cmd, check=True)
new_size = os.path.getsize(output_path)
print(f"β
Compression done. New size: {new_size/1024/1024:.2f} MB")
if new_size <= max_size:
return output_path
else:
print("β οΈ Compressed file still larger than threshold; returning original")
return input_path
except Exception as e:
print("β Compression error:", e)
return input_path
def upload_to_gemini(path, display_name=None):
"""
Upload a file to Gemini using the NEW google-genai SDK.
"""
print(f"π€ Uploading {path} to Gemini...")
try:
uploaded_file = client.files.upload(file=path)
# Wait for processing to complete
print(f"β³ Waiting for file processing: {uploaded_file.name}")
while uploaded_file.state.name == "PROCESSING":
time.sleep(2)
uploaded_file = client.files.get(name=uploaded_file.name)
if uploaded_file.state.name == "FAILED":
raise Exception(f"File processing failed: {uploaded_file.name}")
print(f"β
Uploaded and processed: {uploaded_file.name}")
return uploaded_file
except Exception as e:
print(f"β Upload failed for {path}: {e}")
raise
def merge_pdfs(paths, output_path):
writer = PdfWriter()
for p in paths:
reader = PdfReader(p)
for page in reader.pages:
writer.add_page(page)
with open(output_path, "wb") as f:
writer.write(f)
return output_path
def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, model_name="gemini-2.5-pro"):
"""
Send prompt_text and optionally an uploaded file (or an image object/list) to the model using NEW SDK.
Returns textual response and prints progress.
"""
contents = [prompt_text]
if file_upload_obj:
contents.append(file_upload_obj)
if image_obj:
if isinstance(image_obj, list):
for img_path in image_obj:
if isinstance(img_path, str):
pil_img = Image.open(img_path)
contents.append(pil_img)
else:
contents.append(img_path)
else:
if isinstance(image_obj, str):
pil_img = Image.open(image_obj)
contents.append(pil_img)
else:
contents.append(image_obj)
print("π‘ Sending request to Gemini (prompt length:", len(prompt_text), "chars )")
try:
response = client.models.generate_content(
model=model_name,
contents=contents
)
raw_text = response.text
print("π₯ Received response (chars):", len(raw_text))
return raw_text
except Exception as e:
print(f"β Generation failed: {e}")
# Try fallback model
print("β‘ Trying fallback model: gemini-2.5-flash")
try:
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=contents
)
raw_text = response.text
print("π₯ Received response (chars):", len(raw_text))
return raw_text
except Exception as e2:
print(f"β Fallback also failed: {e2}")
raise
# ---------------- PARSERS ----------------
def extract_question_ids_from_qpms(text: str):
"""Extract question IDs from QP+MS transcript."""
print("π Extracting question IDs from QP+MS transcript using regex...")
clean_text = text.replace("\u00A0", " ").replace("\t", " ")
primary_matches = re.findall(r"^\s*Question\s*[:\s]\s*([\dA-Za-z.()]+)", clean_text, re.MULTILINE)
if primary_matches:
print(f"β
Extracted {len(primary_matches)} question IDs from explicit 'Question X' lines.")
print("IDs:", primary_matches)
return primary_matches
fallback_matches = re.findall(r"^\s*(\d+(?:[.)]|\([a-zA-Z0-9]+\))?[a-zA-Z0-9]*)", clean_text, re.MULTILINE)
if fallback_matches:
print(f"β
Extracted {len(fallback_matches)} question IDs (fallback numbered lists).")
print("IDs:", fallback_matches)
else:
print("β οΈ No question IDs extracted; will send NA placeholder.")
return fallback_matches
# def build_as_cot_prompt_with_expected_ids(expected_ids, qpms_text=None):
# """
# Construct the AS transcription prompt injecting the expected IDs block and graph detection instructions,
# modifying it to include a Chain-of-Thought (CoT) section using a <think> tag, and
# requiring mathematical expressions to be enclosed in LaTeX dollar delimiters ($...$).
# """
# if not expected_ids:
# ids_block = "{NA}"
# else:
# ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
# qpms_guidance = ""
# if qpms_text:
# qpms_guidance = (
# "\nYou are also provided with the full transcript of the Question Paper and Markscheme (QP+MS). "
# "Use this transcript primarily to resolve **ambiguous handwriting** (e.g., if a number could be '$-1.6$' or '$1.6$'). "
# "If you are confident in your transcription without referring to the QP+MS, use your judgment. "
# "**Always prioritize accuracy and context from the QP+MS transcript when in doubt about a specific ambiguous character or expression.**\n"
# )
# prompt = f"""You are a high-quality handwritten transcription assistant, performing transcription with a Chain-of-Thought process.
# INPUT: This PDF contains a student's handwritten answer sheet.
# {qpms_guidance}
# TASK:
# 1. **THINKING:** Before transcribing each answer, you must document your thought process using the **<think>** tag.
# - Identify the question ID. If inferred, note why.
# - Detail any ambiguities encountered (e.g., unclear numbers, symbols, or structure).
# - Explain how you resolved ambiguities, specifically if you referred to the QP+MS transcript.
# - If you *did* refer to the QP+MS but decided to keep your original transcription, state this clearly.
# - If you initially label an answer as 2.a but later realize it aligns better with 2.b based on the marking scheme, you should reassign it to 2.b and briefly explain your reasoning in the <think> tag to maintain clarity and consistency.
# *Example Thinking:*
# <think>
# - Found Question 3(a).
# - Noticed '2x' was written ambiguously; it could be '2x' or '21x'.
# - Referred to QP+MS: The expected answer involves '$21x$'.
# - Re-examined the handwriting carefully: The student's handwriting strongly appears to be '$2x$' and not '$21x$'.
# - DECISION: Transcribe exactly what the student wrote: '$2x$'.
# </think>
# *Example Thinking 2 (Ambiguity Resolved by MS):*
# <think>
# - Found Question INFERRED: 1(b) based on proximity to 1(a).
# - Noticed the final answer looked like '3.6', but the decimal point was very faint and could be '36'.
# - Referred to QP+MS: Expected answer is '$3.8$'. Re-examined the student's writing: it appears to be a poorly written '$3.8$' which I initially misread as '$3.6$'.
# - DECISION: Corrected my transcription to '$3.8$' based on re-evaluation and MS context.
# </think>
# 2. **TRANSCRIPTION:** Transcribe the student's answers with accordance to the markcheme provided. Preserve step order and line breaks.
# - Attempt to assign each answer to a question ID if the student has labelled it (e.g., "1", "1a", "2(b)", "3").
# - If the student hasn't labelled answers, segment contiguous answer blocks and attempt to infer question IDs from context β but mark inferred IDs clearly as "**INFERRED: <id>**".
# - **Enclose all mathematical expressions and single variables in LaTeX dollar delimiters ($...$).**
# - *Example:* "The area is $A = \pi r^2$ so $3x+5 = 11$ thus $x=2$."
# - If a diagram/graph is omitted, write **[Graph omitted]**.
# - Unreadable parts: **[illegible]**.
# - Unanswered: **[No response]**.
# - Do NOT recreate diagrams.
# Ensure consistency and determinism in formatting so subsequent models can grade directly from this aligned format.
# Expected questions (if missing, write NA):
# {ids_block}
# -----------------------
# OUTPUT FORMAT:
# <think>...</think>
# Question <id>
# AS:<transcribed answer or placeholder>
# <think>...</think>
# Question <id>
# AS:<transcribed answer or placeholder>
# ...
# ==== GRAPH FOUND ANSWERS ====
# Graph found in:
# - Answer <number> β Page <number>
# (one per line)
# ==== END GRAPH FOUND ===="""
# return prompt
def build_as_cot_prompt_with_expected_ids(expected_ids, qpms_text=None):
"""
Construct the AS transcription prompt injecting the expected IDs block and graph detection instructions,
modifying it to include a Chain-of-Thought (CoT) section using a <think> tag, and
requiring mathematical expressions to be enclosed in LaTeX dollar delimiters ($...$).
The full qpms_text, when provided, is embedded directly in the prompt and not skipped.
"""
if not expected_ids:
ids_block = "{NA}"
else:
ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
qpms_section = ""
if qpms_text is not None:
# Include the full QP+MS transcript exactly (strip only leading/trailing whitespace)
qpms_section = (
"\nYou are also provided with the full transcript of the Question Paper and Markscheme (QP+MS) below."
"\nUse it primarily to resolve ambiguous handwriting and to confirm expected answers when needed."
"\n--- BEGIN QP+MS TRANSCRIPT ---\n"
f"{qpms_text.strip()}\n"
"--- END QP+MS TRANSCRIPT ---\n"
)
prompt = f"""You are a high-quality handwritten transcription assistant, performing transcription with a Chain-of-Thought process.
INPUT: This PDF contains a student's handwritten answer sheet.
{qpms_section}
TASK:
1. **THINKING:** Before transcribing each answer, you must document your thought process using the **<think>** tag.
- Identify the question ID. If inferred, note why.
- Detail any ambiguities encountered (e.g., unclear numbers, symbols, or structure).
- Explain how you resolved ambiguities, specifically if you referred to the QP+MS transcript.
- If you *did* refer to QP+MS but decided to keep your original transcription, state this clearly.
- If you initially label an answer as 2.a but later realize it aligns better with 2.b based on the marking scheme, reassign it to 2.b and briefly explain your reasoning in the <think> tag.
*Example Thinking:*
<think>
- Found Question 3(a).
- Noticed '2x' was written ambiguously; it could be '2x' or '21x'.
- Referred to QP+MS: The expected answer involves '$21x$'.
- Re-examined the handwriting carefully: The student's handwriting strongly appears to be '$2x$' and not '$21x$'.
- DECISION: Transcribe exactly what the student wrote: '$2x$'.
</think>
2. **TRANSCRIPTION:** Transcribe the student's answers in accordance with the markscheme provided. Preserve step order and line breaks.
- Attempt to assign each answer to a question ID if the student has labelled it (e.g., "1", "1a", "2(b)", "3").
- If the student hasn't labelled answers, segment contiguous answer blocks and attempt to infer question IDs from context β mark inferred IDs clearly as "**INFERRED: <id>**".
- **Enclose all mathematical expressions and single variables in LaTeX dollar delimiters ($...$).**
- Example: "The area is $A = \pi r^2$ so $3x+5 = 11$ thus $x=2$."
- If a diagram/graph is omitted, write **[Graph omitted]**.
- Unreadable parts: **[illegible]**.
- Unanswered: **[No response]**.
- Do NOT recreate diagrams.
Ensure consistency and determinism in formatting so subsequent models can grade directly from this aligned format.
Expected questions (if missing, write NA):
{ids_block}
-----------------------
OUTPUT FORMAT:
<think>...</think>
Question <id>
AS:<transcribed answer or placeholder>
<think>...</think>
Question <id>
AS:<transcribed answer or placeholder>
...
==== GRAPH FOUND ANSWERS ====
Graph found in:
- Answer <number> β Page <number>
(one per line)
==== END GRAPH FOUND ===="""
return prompt
def extract_graph_questions_from_ms(text: str):
"""Extract graph questions and page numbers from MS transcript."""
clean_text = text.replace("\u00A0", " ").replace("\t", " ")
match = re.search(r"==== GRAPH EXPECTED QUESTIONS ====\s*(.*?)\s*==== END GRAPH EXPECTED ====",
clean_text, re.S)
graph_dict = {}
if match:
block = match.group(1)
for line in block.splitlines():
line = line.strip()
if line.startswith("- Question"):
q_match = re.match(r"- Question\s+([\dA-Za-z.()]+)\s*β\s*Page\s*(\d+)", line)
if q_match:
q_id, page = q_match.groups()
graph_dict[q_id] = int(page)
return graph_dict
def extract_graph_answers_from_as(text: str):
"""Extract graph answers and page numbers from AS transcript."""
clean_text = text.replace("\u00A0", " ").replace("\t", " ")
block = re.search(r"==== GRAPH FOUND ANSWERS ====\s*(.*?)\s*==== END GRAPH FOUND ====",
clean_text, re.S)
graph_dict = {}
if block:
for line in block.group(1).splitlines():
line = line.strip()
if line.startswith("- Answer"):
match = re.match(r"- Answer\s+([\dA-Za-z.()]+)\s*β\s*Page\s*(\d+)", line)
if match:
ans_id, page = match.groups()
graph_dict[ans_id] = int(page)
return graph_dict
def extract_marks_from_grading(grading_text):
"""
Parse the grading markdown and extract marks per question.
"""
print("π Extracting awarded marks from grading output...")
grading_json = {"grading": []}
question_blocks = re.split(r"##\s*Question\s+", grading_text)
for block in question_blocks[1:]:
first_line = block.strip().splitlines()[0].strip() if block.strip().splitlines() else ""
q_id_match = re.match(r"([0-9]+(?:[a-zA-Z]|\([^)]+\)|(?:\.[a-zA-Z0-9]+))*)", first_line)
if not q_id_match:
q_id = first_line.split()[0] if first_line else ""
else:
q_id = q_id_match.group(1).strip()
awarded = re.findall(r"\b(M\d+|A\d+|R\d+|M0|A0|R0)\b", block)
grading_json["grading"].append({
"question": q_id,
"marks_awarded": awarded
})
print("β
Extracted grading marks for", len(grading_json["grading"]), "question blocks.")
print(json.dumps(grading_json, indent=2))
return grading_json
# ---------------- MAPPING/IMPRINT HELPERS ----------------
def ask_gemini_for_mapping_batch(image_paths, grading_json, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
"""
Send multiple page images together to Gemini for batch mapping processing.
"""
ids_block = "{NA}"
if expected_ids:
ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
prompt = f"""You are an exam marker. Your role is to identify where each question begins on each page.
The pages are divided into a {rows} x {cols} grid. Each cell has a RUNNING NUMBER label.
For each question in the grading JSON, return the cell NUMBER where the FIRST STEP of that question begins.
β IMPORTANT RULES:
- Do not place marks inside another question's answer area.
- Prefer placing the marks in a BLANK cell immediately to the RIGHT of the answer step. If no blank cell is available to the right, then place in a blank cell to the LEFT.
- Never place marks above or below the answer.
- Each question should have unique cell number
- If a question serial number is visible in the answer image, you must mandatorily identify the corresponding question using the grading JSON.
IMPORTANT: For your help i have provided u questions that u can expect in the images:
{ids_block}
Return JSON only, like:
[{{"page": 1, "question": "1(a)", "cell_number": 15}}, ...]
Grading JSON:
{json.dumps(grading_json, indent=2)}"""
images = [Image.open(p) for p in image_paths]
print(f"π‘ Sending batch mapping request for {len(image_paths)} pages to Gemini...")
try:
contents = [prompt] + images
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=contents
)
raw_text = response.text
except:
print("β οΈ Trying fallback model for mapping...")
contents = [prompt] + images
response = client.models.generate_content(
model="gemini-1.5-flash",
contents=contents
)
raw_text = response.text
print("π₯ Batch mapping response (chars):", len(raw_text))
print("π Gemini raw batch output:")
print(raw_text)
try:
match = re.search(r'(\[.*\])', raw_text, re.DOTALL)
if match:
mapping = json.loads(match.group(1))
print(f"β
Parsed Gemini batch mapping for {len(image_paths)} pages")
return mapping
else:
print("β Failed to find JSON array in response")
return []
except Exception as e:
print(f"β Failed to parse Gemini JSON mapping: {e}")
return []
def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
"""
Convert PDF to images, create grid-numbered images for batch sending to Gemini,
then annotate and produce imprinted PDF.
"""
print("π Converting answer PDF to images for imprinting...")
pages = convert_from_path(pdf_path, dpi=200)
annotated_page_paths = []
temp_grid_images = []
for p_index, page in enumerate(pages):
img = page.convert("RGB")
w, h = img.size
cell_w, cell_h = w / cols, h / rows
draw = ImageDraw.Draw(img)
try:
num_font = ImageFont.truetype("arial.ttf", 20)
except Exception:
num_font = ImageFont.load_default()
cell_num = 1
for r in range(rows):
for c in range(cols):
x = int(c * cell_w + cell_w / 2)
y = int(r * cell_h + cell_h / 2)
text = str(cell_num)
bbox = draw.textbbox((0, 0), text, font=num_font)
tw = bbox[2] - bbox[0]
th = bbox[3] - bbox[1]
draw.text((x - tw/2, y - th/2), text, fill="black", font=num_font)
cell_num += 1
temp_path = f"page_{p_index+1}_grid.png"
img.save(temp_path, "PNG")
temp_grid_images.append(temp_path)
print("π° Created grid image:", temp_path)
print("π‘ Sending page images to Gemini in batches for mapping...")
batch_size = 10
all_mappings = []
for start in range(0, len(temp_grid_images), batch_size):
batch_paths = temp_grid_images[start:start+batch_size]
batch_mapping = ask_gemini_for_mapping_batch(batch_paths, grading_json, expected_ids, rows, cols)
all_mappings.extend(batch_mapping)
print(f"β
Processed batch {start//batch_size + 1}: pages {start+1}-{start+len(batch_paths)}")
print("π Annotating pages with marks...")
for p_index, page in enumerate(pages):
page_num = p_index + 1
page_img = page.convert("RGB")
img_cv = np.array(page_img)
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
h, w, _ = img_cv.shape
cell_w_px, cell_h_px = w / cols, h / rows
page_mappings = [m for m in all_mappings if m.get("page") == page_num]
for item in page_mappings:
qid = item.get("question")
cell_number = item.get("cell_number")
if qid is None or cell_number is None:
continue
marks_list = next((g["marks_awarded"] for g in grading_json.get("grading", []) if g["question"] == qid), [])
if not marks_list:
marks_list = next((g["marks_awarded"] for g in grading_json.get("grading", [])
if g["question"].lower() == (qid or "").lower()), [])
marks_text = ",".join(marks_list) if marks_list else "?"
row = (cell_number - 1) // cols
col = (cell_number - 1) % cols
x_c = int((col + 1) * cell_w_px - cell_w_px / 4)
y_c = int((row + 0.5) * cell_h_px)
font_scale = max(1.0, min(2.0, cell_h_px / 40.0))
thickness = max(2, int(font_scale * 2))
cv2.putText(img_cv, marks_text, (x_c, y_c), cv2.FONT_HERSHEY_SIMPLEX,
font_scale, (0, 0, 255), thickness, cv2.LINE_AA)
print(f"π Marks annotated for page {page_num}, question {qid}: {marks_text}")
annotated_path = f"annotated_page_{page_num}.png"
cv2.imwrite(annotated_path, img_cv)
annotated_page_paths.append(annotated_path)
print("β
Annotated page saved:", annotated_path)
print("π Merging annotated pages into final PDF...")
with open(output_pdf, "wb") as f:
f.write(img2pdf.convert(annotated_page_paths))
compressed = compress_pdf(output_pdf)
print("π Imprinted PDF saved to:", compressed)
return compressed
def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
"""
Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
"""
unique_pages = sorted(set(page_numbers))
images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
out_paths = []
for idx, page_num in enumerate(unique_pages):
img_idx = page_num - min(unique_pages)
img = images[img_idx]
out_path = f"{prefix}_page_{page_num}.png"
img.save(out_path, "PNG")
print(f"π€ Extracted graph page {page_num} from {pdf_path} as {out_path}")
out_paths.append(out_path)
return out_paths
# ---------------- PIPELINE ----------------
def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
"""
Final pipeline with graph-aware grading logic using NEW SDK.
"""
try:
print("π Starting pipeline...")
qp_path = compress_pdf(qp_path)
ms_path = compress_pdf(ms_path)
ans_path = compress_pdf(ans_path)
merged_qpms_path = os.path.splitext(qp_path)[0] + "_merged_qp_ms.pdf"
merge_pdfs([qp_path, ms_path], merged_qpms_path)
print("π Merged QP + MS ->", merged_qpms_path)
print("πΌ Uploading files to Gemini...")
merged_uploaded = upload_to_gemini(merged_qpms_path)
ans_uploaded = upload_to_gemini(ans_path)
print("β
Upload complete.")
print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded)
print("π QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
f.write(qpms_text)
ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
print("πΌοΈ Graph-expected questions in MS:", ms_graph_mapping)
ms_graph_pages = list(ms_graph_mapping.values())
ms_graph_images = []
if ms_graph_pages:
ms_graph_images = extract_pdf_pages_as_images(merged_qpms_path, ms_graph_pages, prefix="qpms_graph")
extracted_ids = extract_question_ids_from_qpms(qpms_text)
if not extracted_ids:
extracted_ids = ["NA"]
print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
as_prompt = build_as_cot_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β Page <number>\n(One per line, after all answers)"
as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded)
print("π AS transcription received. Saving debug file: debug_as_transcript.txt")
with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
f.write(as_text)
as_graph_mapping = extract_graph_answers_from_as(as_text)
print("πΌοΈ Graph-attempted answers in AS:", as_graph_mapping)
as_graph_pages = list(as_graph_mapping.values())
as_graph_images = []
if as_graph_pages:
as_graph_images = extract_pdf_pages_as_images(ans_path, as_graph_pages, prefix="as_graph")
print("2) Preparing grading input and sending to Gemini for grading...")
grading_input = (
"=== QP+MS TRANSCRIPT BEGIN ===\n"
+ qpms_text
+ "\n=== QP+MS TRANSCRIPT END ===\n\n"
+ "=== ANSWER SHEET TRANSCRIPT BEGIN ===\n"
+ as_text
+ "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
)
if ms_graph_images or as_graph_images:
graph_note = "\n\n---\nSome questions require graphs. I've attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
grading_input += graph_note
grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
grading_images = ms_graph_images + as_graph_images
grading_text = gemini_generate_content(grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
print("π§Ύ Grading output received. Saving debug file: debug_grading.md")
with open("debug_grading.md", "w", encoding="utf-8") as f:
f.write(grading_text)
base_name = os.path.splitext(os.path.basename(ans_path))[0]
grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
print("π Grading PDF saved:", grading_pdf_path)
grading_json = extract_marks_from_grading(grading_text)
with open("debug_grading_json.json", "w", encoding="utf-8") as f:
json.dump(grading_json, f, indent=2, ensure_ascii=False)
print("π§ Grading marks extraction complete.")
imprinted_pdf_path = None
if imprint:
print("β Imprint option enabled. Starting imprinting process...")
imprinted_pdf_path = f"{base_name}_imprinted.pdf"
imprinted_pdf_path = imprint_marks_using_mapping(ans_path, grading_json, imprinted_pdf_path, extracted_ids)
print("β
Imprinting finished. Imprinted PDF at:", imprinted_pdf_path)
print("π Pipeline finished successfully.")
return qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path
except Exception as e:
print("β Pipeline error:", e)
import traceback
traceback.print_exc()
return f"β Error: {e}", None, None, None, None
# ---------------- GRADIO UI ----------------
with gr.Blocks(title="AI Grading (Fixed - google-genai SDK)") as demo:
gr.Markdown("## π AI Grading β Fixed with google-genai SDK")
gr.Markdown("**β
Now using the new official `google-genai` SDK (no more ragStoreName errors!)**")
with gr.Row():
qp_file = gr.File(label="π Upload Question Paper (PDF)")
ms_file = gr.File(label="π Upload Markscheme (PDF)")
ans_file = gr.File(label="π Upload Student Answer Sheet (PDF)")
imprint_toggle = gr.Checkbox(label="β Imprint Marks on Student Answer Sheet", value=False)
run_button = gr.Button("π Run Pipeline")
with gr.Row():
qpms_box = gr.Textbox(label="π QP+MS Transcript", lines=12)
as_box = gr.Textbox(label="π AS Transcript", lines=12)
grading_output_box = gr.Textbox(label="π§Ύ Grading (Markdown)", lines=20)
grading_pdf_file = gr.File(label="π₯ Download Grading PDF")
imprint_pdf_file = gr.File(label="π₯ Download Imprinted PDF (Optional)")
def run_pipeline(qp_file_obj, ms_file_obj, ans_file_obj, imprint_flag):
if not qp_file_obj or not ms_file_obj or not ans_file_obj:
return "β Please upload all three files", "", "", None, None
qp_path = qp_file_obj.name
ms_path = ms_file_obj.name
ans_path = ans_file_obj.name
qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path = align_and_grade_pipeline(
qp_path, ms_path, ans_path, imprint=imprint_flag
)
return qpms_text or "", as_text or "", grading_text or "", grading_pdf_path, imprinted_pdf_path
run_button.click(
fn=run_pipeline,
inputs=[qp_file, ms_file, ans_file, imprint_toggle],
outputs=[qpms_box, as_box, grading_output_box, grading_pdf_file, imprint_pdf_file]
)
if __name__ == "__main__":
demo.launch() |