atz21 commited on
Commit
0fe037d
·
verified ·
1 Parent(s): 3e408d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -34
app.py CHANGED
@@ -495,6 +495,26 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
495
  # ---------------- GRAPH DETECTION HELPERS ----------------
496
  # These functions are now robustly handled by the new_code, so they are no longer needed here.
497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  # ---------------- PIPELINE UPDATE FOR GRAPH-AWARE GRADING ----------------
499
  def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
500
  """
@@ -533,20 +553,10 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
533
  # Step 1.i.a: Extract graph-expected questions from MS
534
  ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
535
  print("🖼️ Graph-expected questions in MS:", ms_graph_mapping)
536
-
537
- # NEW: Separate out graph-expected pages as images
538
- if ms_graph_mapping:
539
- print("📤 Separating graph-expected pages as images...")
540
- ms_pages = convert_from_path(merged_qpms_path, dpi=200)
541
- for qnum, page_num in ms_graph_mapping.items():
542
- # Page numbers in PDF are 1-indexed
543
- if 1 <= page_num <= len(ms_pages):
544
- img = ms_pages[page_num-1]
545
- img_path = f"graph_q{qnum}_p{page_num}.png"
546
- img.save(img_path)
547
- print(f"✅ Saved graph image for Question {qnum} (Page {page_num}) as {img_path}")
548
- else:
549
- print(f"⚠️ Page {page_num} for Question {qnum} is out of range (PDF has {len(ms_pages)} pages)")
550
 
551
  # Step 2: extract serial numbers (question IDs) using regex from qpms_text
552
  extracted_ids = extract_question_ids_from_qpms(qpms_text)
@@ -564,19 +574,14 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
564
  # Step 2.a: Extract graph-attempted answers from AS
565
  as_graph_mapping = extract_graph_answers_from_as(as_text)
566
  print("🖼️ Graph-attempted answers in AS:", as_graph_mapping)
 
 
 
 
 
 
567
 
568
- # Step 3: Graph Matching
569
- graph_bundles = []
570
- for ans_num, as_page in as_graph_mapping.items():
571
- if ans_num in ms_graph_mapping:
572
- graph_bundles.append({
573
- "question": ans_num,
574
- "ms_page": ms_graph_mapping[ans_num],
575
- "as_page": as_page
576
- })
577
- print("🔗 Graph bundles for grading:", graph_bundles)
578
-
579
- # Step 4: Grading - send both transcripts to grading model, inject graph bundle info
580
  print("2) Preparing grading input and sending to Gemini for grading...")
581
  grading_input = (
582
  "=== QP+MS TRANSCRIPT BEGIN ===\n"
@@ -586,16 +591,14 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
586
  + as_text
587
  + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
588
  )
589
- # Inject graph bundle note
590
- if graph_bundles:
591
- graph_note = "\n\n---\nFor the following questions, a graph was expected and the student attempted it. Please use the provided images for grading these questions:\n"
592
- for bundle in graph_bundles:
593
- graph_note += f"- Question {bundle['question']}:\n - Markscheme graph (Page {bundle['ms_page']})\n - Student’s graph (Page {bundle['as_page']})\n"
594
- graph_note += "\nGrade these with visual context. For all other questions, proceed as usual.\n---\n"
595
  grading_input += graph_note
596
-
597
  grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
598
- grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input)
 
 
599
  print("🧾 Grading output received. Saving debug file: debug_grading.md")
600
  with open("debug_grading.md", "w", encoding="utf-8") as f:
601
  f.write(grading_text)
 
495
  # ---------------- GRAPH DETECTION HELPERS ----------------
496
  # These functions are now robustly handled by the new_code, so they are no longer needed here.
497
 
498
+ # ---------------- GRAPH PAGE EXTRACTION HELPER ----------------
499
+ def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
500
+ """
501
+ Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
502
+ Prints to console when extracting each page.
503
+ """
504
+ unique_pages = sorted(set(page_numbers))
505
+ images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
506
+ out_paths = []
507
+ for idx, page_num in enumerate(unique_pages):
508
+ # pdf2image returns images in order, but if not contiguous, we need to map
509
+ # So, get the image for this page (1-based)
510
+ img_idx = page_num - min(unique_pages)
511
+ img = images[img_idx]
512
+ out_path = f"{prefix}_page_{page_num}.png"
513
+ img.save(out_path, "PNG")
514
+ print(f"📤 Extracted graph page {page_num} from {pdf_path} as {out_path}")
515
+ out_paths.append(out_path)
516
+ return out_paths
517
+
518
  # ---------------- PIPELINE UPDATE FOR GRAPH-AWARE GRADING ----------------
519
  def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
520
  """
 
553
  # Step 1.i.a: Extract graph-expected questions from MS
554
  ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
555
  print("🖼️ Graph-expected questions in MS:", ms_graph_mapping)
556
+ ms_graph_pages = list(ms_graph_mapping.values())
557
+ ms_graph_images = []
558
+ if ms_graph_pages:
559
+ ms_graph_images = extract_pdf_pages_as_images(merged_qpms_path, ms_graph_pages, prefix="qpms_graph")
 
 
 
 
 
 
 
 
 
 
560
 
561
  # Step 2: extract serial numbers (question IDs) using regex from qpms_text
562
  extracted_ids = extract_question_ids_from_qpms(qpms_text)
 
574
  # Step 2.a: Extract graph-attempted answers from AS
575
  as_graph_mapping = extract_graph_answers_from_as(as_text)
576
  print("🖼️ Graph-attempted answers in AS:", as_graph_mapping)
577
+ as_graph_pages = list(as_graph_mapping.values())
578
+ as_graph_images = []
579
+ if as_graph_pages:
580
+ as_graph_images = extract_pdf_pages_as_images(ans_path, as_graph_pages, prefix="as_graph")
581
+
582
+ # Step 3: (No graph bundle matching, just collect images)
583
 
584
+ # Step 4: Grading - send both transcripts to grading model, inject graph image info
 
 
 
 
 
 
 
 
 
 
 
585
  print("2) Preparing grading input and sending to Gemini for grading...")
586
  grading_input = (
587
  "=== QP+MS TRANSCRIPT BEGIN ===\n"
 
591
  + as_text
592
  + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
593
  )
594
+ # Inject graph image note
595
+ if ms_graph_images or as_graph_images:
596
+ graph_note = "\n\n---\nSome questions require graphs. I’ve attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
 
 
 
597
  grading_input += graph_note
 
598
  grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
599
+ # Pass images as additional input to gemini_generate_content
600
+ grading_images = ms_graph_images + as_graph_images
601
+ grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
602
  print("🧾 Grading output received. Saving debug file: debug_grading.md")
603
  with open("debug_grading.md", "w", encoding="utf-8") as f:
604
  f.write(grading_text)