Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -495,6 +495,26 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
|
|
| 495 |
# ---------------- GRAPH DETECTION HELPERS ----------------
|
| 496 |
# These functions are now robustly handled by the new_code, so they are no longer needed here.
|
| 497 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
# ---------------- PIPELINE UPDATE FOR GRAPH-AWARE GRADING ----------------
|
| 499 |
def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
|
| 500 |
"""
|
|
@@ -533,20 +553,10 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
|
|
| 533 |
# Step 1.i.a: Extract graph-expected questions from MS
|
| 534 |
ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
|
| 535 |
print("🖼️ Graph-expected questions in MS:", ms_graph_mapping)
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
if
|
| 539 |
-
|
| 540 |
-
ms_pages = convert_from_path(merged_qpms_path, dpi=200)
|
| 541 |
-
for qnum, page_num in ms_graph_mapping.items():
|
| 542 |
-
# Page numbers in PDF are 1-indexed
|
| 543 |
-
if 1 <= page_num <= len(ms_pages):
|
| 544 |
-
img = ms_pages[page_num-1]
|
| 545 |
-
img_path = f"graph_q{qnum}_p{page_num}.png"
|
| 546 |
-
img.save(img_path)
|
| 547 |
-
print(f"✅ Saved graph image for Question {qnum} (Page {page_num}) as {img_path}")
|
| 548 |
-
else:
|
| 549 |
-
print(f"⚠️ Page {page_num} for Question {qnum} is out of range (PDF has {len(ms_pages)} pages)")
|
| 550 |
|
| 551 |
# Step 2: extract serial numbers (question IDs) using regex from qpms_text
|
| 552 |
extracted_ids = extract_question_ids_from_qpms(qpms_text)
|
|
@@ -564,19 +574,14 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
|
|
| 564 |
# Step 2.a: Extract graph-attempted answers from AS
|
| 565 |
as_graph_mapping = extract_graph_answers_from_as(as_text)
|
| 566 |
print("🖼️ Graph-attempted answers in AS:", as_graph_mapping)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
|
| 568 |
-
# Step
|
| 569 |
-
graph_bundles = []
|
| 570 |
-
for ans_num, as_page in as_graph_mapping.items():
|
| 571 |
-
if ans_num in ms_graph_mapping:
|
| 572 |
-
graph_bundles.append({
|
| 573 |
-
"question": ans_num,
|
| 574 |
-
"ms_page": ms_graph_mapping[ans_num],
|
| 575 |
-
"as_page": as_page
|
| 576 |
-
})
|
| 577 |
-
print("🔗 Graph bundles for grading:", graph_bundles)
|
| 578 |
-
|
| 579 |
-
# Step 4: Grading - send both transcripts to grading model, inject graph bundle info
|
| 580 |
print("2) Preparing grading input and sending to Gemini for grading...")
|
| 581 |
grading_input = (
|
| 582 |
"=== QP+MS TRANSCRIPT BEGIN ===\n"
|
|
@@ -586,16 +591,14 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
|
|
| 586 |
+ as_text
|
| 587 |
+ "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
|
| 588 |
)
|
| 589 |
-
# Inject graph
|
| 590 |
-
if
|
| 591 |
-
graph_note = "\n\n---\
|
| 592 |
-
for bundle in graph_bundles:
|
| 593 |
-
graph_note += f"- Question {bundle['question']}:\n - Markscheme graph (Page {bundle['ms_page']})\n - Student’s graph (Page {bundle['as_page']})\n"
|
| 594 |
-
graph_note += "\nGrade these with visual context. For all other questions, proceed as usual.\n---\n"
|
| 595 |
grading_input += graph_note
|
| 596 |
-
|
| 597 |
grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
|
| 598 |
-
|
|
|
|
|
|
|
| 599 |
print("🧾 Grading output received. Saving debug file: debug_grading.md")
|
| 600 |
with open("debug_grading.md", "w", encoding="utf-8") as f:
|
| 601 |
f.write(grading_text)
|
|
|
|
| 495 |
# ---------------- GRAPH DETECTION HELPERS ----------------
|
| 496 |
# These functions are now robustly handled by the new_code, so they are no longer needed here.
|
| 497 |
|
| 498 |
+
# ---------------- GRAPH PAGE EXTRACTION HELPER ----------------
|
| 499 |
+
def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
|
| 500 |
+
"""
|
| 501 |
+
Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
|
| 502 |
+
Prints to console when extracting each page.
|
| 503 |
+
"""
|
| 504 |
+
unique_pages = sorted(set(page_numbers))
|
| 505 |
+
images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
|
| 506 |
+
out_paths = []
|
| 507 |
+
for idx, page_num in enumerate(unique_pages):
|
| 508 |
+
# pdf2image returns images in order, but if not contiguous, we need to map
|
| 509 |
+
# So, get the image for this page (1-based)
|
| 510 |
+
img_idx = page_num - min(unique_pages)
|
| 511 |
+
img = images[img_idx]
|
| 512 |
+
out_path = f"{prefix}_page_{page_num}.png"
|
| 513 |
+
img.save(out_path, "PNG")
|
| 514 |
+
print(f"📤 Extracted graph page {page_num} from {pdf_path} as {out_path}")
|
| 515 |
+
out_paths.append(out_path)
|
| 516 |
+
return out_paths
|
| 517 |
+
|
| 518 |
# ---------------- PIPELINE UPDATE FOR GRAPH-AWARE GRADING ----------------
|
| 519 |
def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
|
| 520 |
"""
|
|
|
|
| 553 |
# Step 1.i.a: Extract graph-expected questions from MS
|
| 554 |
ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
|
| 555 |
print("🖼️ Graph-expected questions in MS:", ms_graph_mapping)
|
| 556 |
+
ms_graph_pages = list(ms_graph_mapping.values())
|
| 557 |
+
ms_graph_images = []
|
| 558 |
+
if ms_graph_pages:
|
| 559 |
+
ms_graph_images = extract_pdf_pages_as_images(merged_qpms_path, ms_graph_pages, prefix="qpms_graph")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
|
| 561 |
# Step 2: extract serial numbers (question IDs) using regex from qpms_text
|
| 562 |
extracted_ids = extract_question_ids_from_qpms(qpms_text)
|
|
|
|
| 574 |
# Step 2.a: Extract graph-attempted answers from AS
|
| 575 |
as_graph_mapping = extract_graph_answers_from_as(as_text)
|
| 576 |
print("🖼️ Graph-attempted answers in AS:", as_graph_mapping)
|
| 577 |
+
as_graph_pages = list(as_graph_mapping.values())
|
| 578 |
+
as_graph_images = []
|
| 579 |
+
if as_graph_pages:
|
| 580 |
+
as_graph_images = extract_pdf_pages_as_images(ans_path, as_graph_pages, prefix="as_graph")
|
| 581 |
+
|
| 582 |
+
# Step 3: (No graph bundle matching, just collect images)
|
| 583 |
|
| 584 |
+
# Step 4: Grading - send both transcripts to grading model, inject graph image info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 585 |
print("2) Preparing grading input and sending to Gemini for grading...")
|
| 586 |
grading_input = (
|
| 587 |
"=== QP+MS TRANSCRIPT BEGIN ===\n"
|
|
|
|
| 591 |
+ as_text
|
| 592 |
+ "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
|
| 593 |
)
|
| 594 |
+
# Inject graph image note
|
| 595 |
+
if ms_graph_images or as_graph_images:
|
| 596 |
+
graph_note = "\n\n---\nSome questions require graphs. I’ve attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
|
|
|
|
|
|
|
|
|
|
| 597 |
grading_input += graph_note
|
|
|
|
| 598 |
grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
|
| 599 |
+
# Pass images as additional input to gemini_generate_content
|
| 600 |
+
grading_images = ms_graph_images + as_graph_images
|
| 601 |
+
grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
|
| 602 |
print("🧾 Grading output received. Saving debug file: debug_grading.md")
|
| 603 |
with open("debug_grading.md", "w", encoding="utf-8") as f:
|
| 604 |
f.write(grading_text)
|