neurolearn

Sleeping

App Files Files Community

atz21 commited on Sep 28, 2025

Commit

0fe037d

verified ·

1 Parent(s): 3e408d2

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -34

app.py CHANGED Viewed

@@ -495,6 +495,26 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
 # ---------------- GRAPH DETECTION HELPERS ----------------
 # These functions are now robustly handled by the new_code, so they are no longer needed here.
 # ---------------- PIPELINE UPDATE FOR GRAPH-AWARE GRADING ----------------
 def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
     """
@@ -533,20 +553,10 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
         # Step 1.i.a: Extract graph-expected questions from MS
         ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
         print("🖼️ Graph-expected questions in MS:", ms_graph_mapping)
-        # NEW: Separate out graph-expected pages as images
-        if ms_graph_mapping:
-            print("📤 Separating graph-expected pages as images...")
-            ms_pages = convert_from_path(merged_qpms_path, dpi=200)
-            for qnum, page_num in ms_graph_mapping.items():
-                # Page numbers in PDF are 1-indexed
-                if 1 <= page_num <= len(ms_pages):
-                    img = ms_pages[page_num-1]
-                    img_path = f"graph_q{qnum}_p{page_num}.png"
-                    img.save(img_path)
-                    print(f"✅ Saved graph image for Question {qnum} (Page {page_num}) as {img_path}")
-                else:
-                    print(f"⚠️ Page {page_num} for Question {qnum} is out of range (PDF has {len(ms_pages)} pages)")
         # Step 2: extract serial numbers (question IDs) using regex from qpms_text
         extracted_ids = extract_question_ids_from_qpms(qpms_text)
@@ -564,19 +574,14 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
         # Step 2.a: Extract graph-attempted answers from AS
         as_graph_mapping = extract_graph_answers_from_as(as_text)
         print("🖼️ Graph-attempted answers in AS:", as_graph_mapping)
-        # Step 3: Graph Matching
-        graph_bundles = []
-        for ans_num, as_page in as_graph_mapping.items():
-            if ans_num in ms_graph_mapping:
-                graph_bundles.append({
-                    "question": ans_num,
-                    "ms_page": ms_graph_mapping[ans_num],
-                    "as_page": as_page
-                })
-        print("🔗 Graph bundles for grading:", graph_bundles)
-        # Step 4: Grading - send both transcripts to grading model, inject graph bundle info
         print("2) Preparing grading input and sending to Gemini for grading...")
         grading_input = (
             "=== QP+MS TRANSCRIPT BEGIN ===\n"
@@ -586,16 +591,14 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
             + as_text
             + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
         )
-        # Inject graph bundle note
-        if graph_bundles:
-            graph_note = "\n\n---\nFor the following questions, a graph was expected and the student attempted it. Please use the provided images for grading these questions:\n"
-            for bundle in graph_bundles:
-                graph_note += f"- Question {bundle['question']}:\n  - Markscheme graph (Page {bundle['ms_page']})\n  - Student’s graph (Page {bundle['as_page']})\n"
-            graph_note += "\nGrade these with visual context. For all other questions, proceed as usual.\n---\n"
             grading_input += graph_note
         grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
-        grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input)
         print("🧾 Grading output received. Saving debug file: debug_grading.md")
         with open("debug_grading.md", "w", encoding="utf-8") as f:
             f.write(grading_text)

 # ---------------- GRAPH DETECTION HELPERS ----------------
 # These functions are now robustly handled by the new_code, so they are no longer needed here.
+# ---------------- GRAPH PAGE EXTRACTION HELPER ----------------
+def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
+    """
+    Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
+    Prints to console when extracting each page.
+    """
+    unique_pages = sorted(set(page_numbers))
+    images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
+    out_paths = []
+    for idx, page_num in enumerate(unique_pages):
+        # pdf2image returns images in order, but if not contiguous, we need to map
+        # So, get the image for this page (1-based)
+        img_idx = page_num - min(unique_pages)
+        img = images[img_idx]
+        out_path = f"{prefix}_page_{page_num}.png"
+        img.save(out_path, "PNG")
+        print(f"📤 Extracted graph page {page_num} from {pdf_path} as {out_path}")
+        out_paths.append(out_path)
+    return out_paths
 # ---------------- PIPELINE UPDATE FOR GRAPH-AWARE GRADING ----------------
 def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
     """
         # Step 1.i.a: Extract graph-expected questions from MS
         ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
         print("🖼️ Graph-expected questions in MS:", ms_graph_mapping)
+        ms_graph_pages = list(ms_graph_mapping.values())
+        ms_graph_images = []
+        if ms_graph_pages:
+            ms_graph_images = extract_pdf_pages_as_images(merged_qpms_path, ms_graph_pages, prefix="qpms_graph")
         # Step 2: extract serial numbers (question IDs) using regex from qpms_text
         extracted_ids = extract_question_ids_from_qpms(qpms_text)
         # Step 2.a: Extract graph-attempted answers from AS
         as_graph_mapping = extract_graph_answers_from_as(as_text)
         print("🖼️ Graph-attempted answers in AS:", as_graph_mapping)
+        as_graph_pages = list(as_graph_mapping.values())
+        as_graph_images = []
+        if as_graph_pages:
+            as_graph_images = extract_pdf_pages_as_images(ans_path, as_graph_pages, prefix="as_graph")
+        # Step 3: (No graph bundle matching, just collect images)
+        # Step 4: Grading - send both transcripts to grading model, inject graph image info
         print("2) Preparing grading input and sending to Gemini for grading...")
         grading_input = (
             "=== QP+MS TRANSCRIPT BEGIN ===\n"
             + as_text
             + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
         )
+        # Inject graph image note
+        if ms_graph_images or as_graph_images:
+            graph_note = "\n\n---\nSome questions require graphs. I’ve attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
             grading_input += graph_note
         grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
+        # Pass images as additional input to gemini_generate_content
+        grading_images = ms_graph_images + as_graph_images
+        grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
         print("🧾 Grading output received. Saving debug file: debug_grading.md")
         with open("debug_grading.md", "w", encoding="utf-8") as f:
             f.write(grading_text)