atz21 commited on
Commit
a5a195e
Β·
verified Β·
1 Parent(s): 3b94934

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -123
app.py CHANGED
@@ -28,7 +28,8 @@ INPUT: This file is a PDF that first contains the Question Paper and immediately
28
  TASK:
29
  1. Transcribe EXACTLY all the questions FIRST (with their total marks).
30
  2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
31
- 3. Always number the questions sequentially (Question 1, Question 2, Question 3, …) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank.
 
32
  FORMAT:
33
  ==== PAPER TOTAL MARKS ====
34
  <total marks>
@@ -52,7 +53,7 @@ Answer 2 :
52
  <exact MS for Q2 with notations>
53
  (repeat for all answers)
54
  ==== MARKSCHEME END ====
55
- """
56
  }
57
  ,
58
 
@@ -94,8 +95,9 @@ At the very end, provide a summary table:
94
  Then show total clearly as a final line:
95
  `Total: <obtained_marks>/<max_marks>`
96
  NOTES:
97
- - The assistant will receive two transcripts: (1) QP+MS transcription (questions then markscheme) and (2) AS transcription (student answers). Use the QP+MS transcript as the authoritative source of question wording, total marks, and verbatim markscheme entries (M/A/R mark IDs).
98
  - Match student answers to question IDs and grade according to the provided verbatim markscheme.
 
99
  - Produce full markdown as above. Ensure mark IDs used in the grading are present and consistent with the markscheme.
100
  """
101
  }
@@ -221,32 +223,73 @@ def extract_question_ids_from_qpms(text):
221
  print("⚠️ No question IDs extracted; will send NA placeholder.")
222
  return ids
223
 
224
- def build_as_prompt_with_expected_ids(expected_ids):
225
- """
226
- Construct the AS transcription prompt injecting the expected IDs block.
227
- """
228
- if not expected_ids:
229
- ids_block = "{NA}"
230
- else:
231
- ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
232
- prompt = f"""You are a high-quality handwritten transcription assistant.
233
- INPUT: This PDF contains a student's handwritten answer sheet.
234
- TASK: Transcribe the student's answers exactly (as text). Preserve step order and line breaks. Attempt to assign each answer to a question ID if the student has labelled it (e.g., "1", "1a", "2(b)", "3"). If the student hasn't labelled answers, segment contiguous answer blocks and attempt to infer question IDs from context β€” but mark inferred IDs clearly as "INFERRED: <id>"
235
- Enclose all mathematical expressions in Markdown fenced code blocks (``` triple backticks).
236
- If a diagram/graph is omitted, write [Graph omitted].
237
- Unreadable parts: [illegible].
238
- Unanswered: [No response].
239
- Do NOT recreate diagrams.
240
- Ensure consistency and determinism in formatting so subsequent models can grade directly from this aligned format.
241
- Expected questions (if missing, write NA):
242
- {ids_block}
243
- -----------------------
244
- OUTPUT FORMAT:
245
- Question <id>
246
- AS:
247
- <transcribed answer or placeholder>
248
- """
249
- return prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
  def extract_marks_from_grading(grading_text):
252
  """
@@ -283,17 +326,19 @@ def ask_gemini_for_mapping_for_page(model, image_path, grading_json, expected_id
283
  if expected_ids:
284
  ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
285
  prompt = f"""
286
- You are an exam marker. Your role is to identify where each question begins on the page.
 
 
287
  The page is divided into a {rows} x {cols} grid. Each cell has a RUNNING NUMBER label (1..{rows*cols}).
288
  For each question in the grading JSON, return the cell NUMBER where the FIRST STEP of that question begins.
289
-
290
- IMPORTANT: Only spot and return cell numbers for the following question IDs (one per line):
291
  {ids_block}
292
  If you see a sub-question (e.g., ii) above a main question (e.g., Q4), infer it belongs to the previous question (e.g., Q3.ii).
293
  - Do not place marks inside another question's answer area.
294
- - Prefer placing the marks in a BLANK cell immediately to the RIGHT of the answer step. If no blank cell is available to the right, then place in a blank cell to the LEFT.
 
295
  - Never place marks above or below the answer.
296
- - If a question starts on a previous page, you may omit it for this page.
297
  Return JSON only, like:
298
  [{{"question": "1.a", "cell_number": 15}}, ...]
299
  Grading JSON:
@@ -301,7 +346,8 @@ Grading JSON:
301
  """
302
  print(f"πŸ“‘ Sending mapping request for image {image_path} to Gemini...")
303
  img = Image.open(image_path)
304
- response = model.generate_content([prompt, img])
 
305
  raw_text = getattr(response, "text", None)
306
  if not raw_text and getattr(response, "candidates", None):
307
  raw_text = response.candidates[0].content.parts[0].text
@@ -446,93 +492,141 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
446
  print("πŸ“‘ Imprinted PDF saved to:", compressed)
447
  return compressed
448
 
449
- # ---------------- MAIN PIPELINE ----------------
450
- def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
451
- """
452
- Final pipeline implementing requested flow and verbose console logging.
453
- """
454
- try:
455
- print("πŸ” Starting pipeline...")
456
- # Step 0: compress as needed
457
- qp_path = compress_pdf(qp_path)
458
- ms_path = compress_pdf(ms_path)
459
- ans_path = compress_pdf(ans_path)
460
-
461
- # Merge QP + MS
462
- merged_qpms_path = os.path.splitext(qp_path)[0] + "_merged_qp_ms.pdf"
463
- merge_pdfs([qp_path, ms_path], merged_qpms_path)
464
- print("πŸ“Ž Merged QP + MS ->", merged_qpms_path)
465
-
466
- # Upload files to Gemini
467
- print("πŸ”Ό Uploading files to Gemini...")
468
- merged_uploaded = genai.upload_file(path=merged_qpms_path, display_name="QP+MS (merged)")
469
- ans_uploaded = genai.upload_file(path=ans_path, display_name="Answer Sheet")
470
- print("βœ… Upload complete.")
471
-
472
- # Create model and print which selected
473
- model = create_model()
474
-
475
- # Step 1.i: QP+MS transcription (first)
476
- print("1.i) Transcribing QP+MS (questions first, then full markscheme)...")
477
- qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"]
478
- qpms_text = gemini_generate_content(model, qpms_prompt, file_upload_obj=merged_uploaded)
479
- print("πŸ“„ QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
480
- with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
481
- f.write(qpms_text)
482
-
483
- # Step 2: extract serial numbers (question IDs) using regex from qpms_text
484
- extracted_ids = extract_question_ids_from_qpms(qpms_text)
485
- if not extracted_ids:
486
- extracted_ids = ["NA"]
487
-
488
- # Step 1.ii: Build AS prompt injecting extracted IDs and transcribe AS
489
- print("1.ii) Building AS transcription prompt with expected question IDs and sending to Gemini...")
490
- as_prompt = build_as_prompt_with_expected_ids(extracted_ids)
491
- as_text = gemini_generate_content(model, as_prompt, file_upload_obj=ans_uploaded)
492
- print("πŸ“ AS transcription received. Saving debug file: debug_as_transcript.txt")
493
- with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
494
- f.write(as_text)
495
-
496
- # Step 3: Grading - send both transcripts to grading model
497
- print("2) Preparing grading input and sending to Gemini for grading...")
498
- grading_input = (
499
- "=== QP+MS TRANSCRIPT BEGIN ===\n"
500
- + qpms_text
501
- + "\n=== QP+MS TRANSCRIPT END ===\n\n"
502
- + "=== ANSWER SHEET TRANSCRIPT BEGIN ===\n"
503
- + as_text
504
- + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
505
- )
506
- grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
507
- grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input)
508
- print("🧾 Grading output received. Saving debug file: debug_grading.md")
509
- with open("debug_grading.md", "w", encoding="utf-8") as f:
510
- f.write(grading_text)
511
-
512
- # Save grading PDF
513
- base_name = os.path.splitext(os.path.basename(ans_path))[0]
514
- grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
515
- print("πŸ“„ Grading PDF saved:", grading_pdf_path)
516
-
517
- # Step 4: Extract marks for imprinting
518
- grading_json = extract_marks_from_grading(grading_text)
519
- with open("debug_grading_json.json", "w", encoding="utf-8") as f:
520
- json.dump(grading_json, f, indent=2, ensure_ascii=False)
521
- print("πŸ”§ Grading marks extraction complete.")
522
-
523
- imprinted_pdf_path = None
524
- if imprint:
525
- print("✍ Imprint option enabled. Starting imprinting process (parallel mapping requests)...")
526
- imprinted_pdf_path = f"{base_name}_imprinted.pdf"
527
- imprinted_pdf_path = imprint_marks_using_mapping(ans_path, grading_json, imprinted_pdf_path, model, extracted_ids)
528
- print("βœ… Imprinting finished. Imprinted PDF at:", imprinted_pdf_path)
529
-
530
- print("🏁 Pipeline finished successfully.")
531
- return qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path
532
-
533
- except Exception as e:
534
- print("❌ Pipeline error:", e)
535
- return f"❌ Error: {e}", None, None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
 
537
  # ---------------- GRADIO UI ----------------
538
  with gr.Blocks(title="LeadIB AI Grading (Final Flow β€” Verbose)") as demo:
@@ -572,4 +666,4 @@ with gr.Blocks(title="LeadIB AI Grading (Final Flow β€” Verbose)") as demo:
572
  )
573
 
574
  if __name__ == "__main__":
575
- demo.launch()
 
28
  TASK:
29
  1. Transcribe EXACTLY all the questions FIRST (with their total marks).
30
  2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
31
+ 3. Always number the questions sequentially (Question 1, Question 2, Question 3, …) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank. Never start a question other than question 1 ( even if it is labelled in pdf as 8 name it 1)
32
+ 4. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
33
  FORMAT:
34
  ==== PAPER TOTAL MARKS ====
35
  <total marks>
 
53
  <exact MS for Q2 with notations>
54
  (repeat for all answers)
55
  ==== MARKSCHEME END ====
56
+ ==== GRAPH EXPECTED QUESTIONS ====\nGraph expected in:\n- Question <number> β†’ Page <number>\n(one per line)\n==== END GRAPH EXPECTED ====\n"""
57
  }
58
  ,
59
 
 
95
  Then show total clearly as a final line:
96
  `Total: <obtained_marks>/<max_marks>`
97
  NOTES:
98
+ - The assistant will receive two transcripts: (1) QP+MS transcript (questions then markscheme) and (2) AS transcript (student answers). Use the QP+MS transcript as the authoritative source of question wording, total marks, and verbatim markscheme entries (M/A/R mark IDs).
99
  - Match student answers to question IDs and grade according to the provided verbatim markscheme.
100
+ - For questions where a graph is expected and the student attempted a graph, you will be provided with the relevant markscheme and answer sheet graph images/pages. Use these for grading those questions with visual context. For all other questions, proceed as usual.
101
  - Produce full markdown as above. Ensure mark IDs used in the grading are present and consistent with the markscheme.
102
  """
103
  }
 
223
  print("⚠️ No question IDs extracted; will send NA placeholder.")
224
  return ids
225
 
226
+ # Update AS prompt builder to include graph detection
227
+
228
+ def build_as_prompt_with_expected_ids(expected_ids, qpms_text=None):
229
+ """
230
+ Construct the AS transcription prompt injecting the expected IDs block and graph detection instructions.
231
+ If qpms_text is provided, instruct the LLM to refer to it for ambiguous handwriting.
232
+ """
233
+ if not expected_ids:
234
+ ids_block = "{NA}"
235
+ else:
236
+ ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
237
+ refer_text = ""
238
+ if qpms_text:
239
+ refer_text = (
240
+ "\nYou are also provided with the full transcript of the Question Paper and Markscheme (QP+MS). "
241
+ "If you encounter ambiguous handwriting (for example, if a number could be '-1.6' or '1.6'), refer to the QP+MS transcript to infer the student's intended answer. "
242
+ "However, if you are confident in your transcription, you may use your own judgment. "
243
+ "Always prioritize accuracy and context from the QP+MS transcript when in doubt.\n"
244
+ )
245
+ prompt = f"""You are a high-quality handwritten transcription assistant.
246
+ INPUT: This PDF contains a student's handwritten answer sheet.{refer_text}
247
+ TASK: Transcribe the student's answers exactly (as text). Preserve step order and line breaks. Attempt to assign each answer to a question ID if the student has labelled it (e.g., "1", "1a", "2(b)", "3"). If the student hasn't labelled answers, segment contiguous answer blocks and attempt to infer question IDs from context β€” but mark inferred IDs clearly as "INFERRED: <id>"
248
+ Enclose all mathematical expressions in Markdown fenced code blocks (``` triple backticks).
249
+ If a diagram/graph is omitted, write [Graph omitted].
250
+ Unreadable parts: [illegible].
251
+ Unanswered: [No response].
252
+ Do NOT recreate diagrams.
253
+ Ensure consistency and determinism in formatting so subsequent models can grade directly from this aligned format.
254
+ Expected questions (if missing, write NA):
255
+ {ids_block}
256
+ -----------------------
257
+ OUTPUT FORMAT:
258
+ Question <id>
259
+ AS:
260
+ <transcribed answer or placeholder>
261
+ ==== GRAPH FOUND ANSWERS ====\nGraph found in:\n- Answer <number> β†’ Page <number>\n(one per line)\n==== END GRAPH FOUND ===="""
262
+ return prompt
263
+
264
+ # Robust parsing functions for graph detection
265
+
266
+ def extract_graph_questions_from_ms(ms_text):
267
+ """
268
+ Parse LLM output for Markscheme to extract questions/pages where a graph is expected.
269
+ Returns dict: {question_number: ms_page_number}
270
+ """
271
+ matches = re.findall(r"==== GRAPH EXPECTED QUESTIONS ====\\s*Graph expected in:(.*?)==== END GRAPH EXPECTED ====" , ms_text, re.DOTALL)
272
+ mapping = {}
273
+ if matches:
274
+ for line in matches[0].splitlines():
275
+ m = re.match(r"-\s*Question\s*(\d+)\s*[\u2192\-\:]\s*Page\s*(\d+)", line.strip())
276
+ if m:
277
+ mapping[int(m.group(1))] = int(m.group(2))
278
+ return mapping
279
+
280
+ def extract_graph_answers_from_as(as_text):
281
+ """
282
+ Parse LLM output for Answer Sheet to extract answers/pages where a graph was found.
283
+ Returns dict: {answer_number: as_page_number}
284
+ """
285
+ matches = re.findall(r"==== GRAPH FOUND ANSWERS ====\\s*Graph found in:(.*?)==== END GRAPH FOUND ====" , as_text, re.DOTALL)
286
+ mapping = {}
287
+ if matches:
288
+ for line in matches[0].splitlines():
289
+ m = re.match(r"-\s*Answer\s*(\d+)\s*[\u2192\-\:]\s*Page\s*(\d+)", line.strip())
290
+ if m:
291
+ mapping[int(m.group(1))] = int(m.group(2))
292
+ return mapping
293
 
294
  def extract_marks_from_grading(grading_text):
295
  """
 
326
  if expected_ids:
327
  ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
328
  prompt = f"""
329
+ You are an exam marker. Your task is to locate a blank cell adjacent to the answer step and place the marks there:
330
+ Primary preference: Use the blank cell immediately to the right of the answer step.
331
+ Fallback: If no blank cell is available on the right, use the blank cell immediately to the left..
332
  The page is divided into a {rows} x {cols} grid. Each cell has a RUNNING NUMBER label (1..{rows*cols}).
333
  For each question in the grading JSON, return the cell NUMBER where the FIRST STEP of that question begins.
334
+ IMPORTANT: For your help i have provided u questions that u can expect in the image :
 
335
  {ids_block}
336
  If you see a sub-question (e.g., ii) above a main question (e.g., Q4), infer it belongs to the previous question (e.g., Q3.ii).
337
  - Do not place marks inside another question's answer area.
338
+ - Each question should have unique cell number
339
+ - If a question serial number is visible in the answer image, you must mandatorily identify the corresponding question using the grading JSON.
340
  - Never place marks above or below the answer.
341
+ - Only if there is no serial number u may omit to select cell number for mark placement
342
  Return JSON only, like:
343
  [{{"question": "1.a", "cell_number": 15}}, ...]
344
  Grading JSON:
 
346
  """
347
  print(f"πŸ“‘ Sending mapping request for image {image_path} to Gemini...")
348
  img = Image.open(image_path)
349
+ response = model.generate_content([prompt, img])
350
+ print("πŸ’¬ Gemini response:", response)
351
  raw_text = getattr(response, "text", None)
352
  if not raw_text and getattr(response, "candidates", None):
353
  raw_text = response.candidates[0].content.parts[0].text
 
492
  print("πŸ“‘ Imprinted PDF saved to:", compressed)
493
  return compressed
494
 
495
+ # ---------------- GRAPH DETECTION HELPERS ----------------
496
+ # These functions are now robustly handled by the new_code, so they are no longer needed here.
497
+
498
+ # ---------------- GRAPH PAGE EXTRACTION HELPER ----------------
499
+ def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
500
+ """
501
+ Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
502
+ Prints to console when extracting each page.
503
+ """
504
+ unique_pages = sorted(set(page_numbers))
505
+ images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
506
+ out_paths = []
507
+ for idx, page_num in enumerate(unique_pages):
508
+ # pdf2image returns images in order, but if not contiguous, we need to map
509
+ # So, get the image for this page (1-based)
510
+ img_idx = page_num - min(unique_pages)
511
+ img = images[img_idx]
512
+ out_path = f"{prefix}_page_{page_num}.png"
513
+ img.save(out_path, "PNG")
514
+ print(f"πŸ“€ Extracted graph page {page_num} from {pdf_path} as {out_path}")
515
+ out_paths.append(out_path)
516
+ return out_paths
517
+
518
+ # ---------------- PIPELINE UPDATE FOR GRAPH-AWARE GRADING ----------------
519
+ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
520
+ """
521
+ Final pipeline implementing requested flow and verbose console logging.
522
+ Now includes Graph-Aware Grading logic.
523
+ """
524
+ try:
525
+ print("πŸ” Starting pipeline...")
526
+ # Step 0: compress as needed
527
+ qp_path = compress_pdf(qp_path)
528
+ ms_path = compress_pdf(ms_path)
529
+ ans_path = compress_pdf(ans_path)
530
+
531
+ # Merge QP + MS
532
+ merged_qpms_path = os.path.splitext(qp_path)[0] + "_merged_qp_ms.pdf"
533
+ merge_pdfs([qp_path, ms_path], merged_qpms_path)
534
+ print("πŸ“Ž Merged QP + MS ->", merged_qpms_path)
535
+
536
+ # Upload files to Gemini
537
+ print("πŸ”Ό Uploading files to Gemini...")
538
+ merged_uploaded = genai.upload_file(path=merged_qpms_path, display_name="QP+MS (merged)")
539
+ ans_uploaded = genai.upload_file(path=ans_path, display_name="Answer Sheet")
540
+ print("βœ… Upload complete.")
541
+
542
+ # Create model and print which selected
543
+ model = create_model()
544
+
545
+ # Step 1.i: QP+MS transcription (first)
546
+ print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
547
+ qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β†’ Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
548
+ qpms_text = gemini_generate_content(model, qpms_prompt, file_upload_obj=merged_uploaded)
549
+ print("πŸ“„ QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
550
+ with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
551
+ f.write(qpms_text)
552
+
553
+ # Step 1.i.a: Extract graph-expected questions from MS
554
+ ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
555
+ print("πŸ–ΌοΈ Graph-expected questions in MS:", ms_graph_mapping)
556
+ ms_graph_pages = list(ms_graph_mapping.values())
557
+ ms_graph_images = []
558
+ if ms_graph_pages:
559
+ ms_graph_images = extract_pdf_pages_as_images(merged_qpms_path, ms_graph_pages, prefix="qpms_graph")
560
+
561
+ # Step 2: extract serial numbers (question IDs) using regex from qpms_text
562
+ extracted_ids = extract_question_ids_from_qpms(qpms_text)
563
+ if not extracted_ids:
564
+ extracted_ids = ["NA"]
565
+
566
+ # Step 1.ii: Build AS prompt injecting extracted IDs and transcribe AS
567
+ print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
568
+ as_prompt = build_as_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β†’ Page <number>\n(One per line, after all answers)"
569
+ as_text = gemini_generate_content(model, as_prompt, file_upload_obj=ans_uploaded)
570
+ print("πŸ“ AS transcription received. Saving debug file: debug_as_transcript.txt")
571
+ with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
572
+ f.write(as_text)
573
+
574
+ # Step 2.a: Extract graph-attempted answers from AS
575
+ as_graph_mapping = extract_graph_answers_from_as(as_text)
576
+ print("πŸ–ΌοΈ Graph-attempted answers in AS:", as_graph_mapping)
577
+ as_graph_pages = list(as_graph_mapping.values())
578
+ as_graph_images = []
579
+ if as_graph_pages:
580
+ as_graph_images = extract_pdf_pages_as_images(ans_path, as_graph_pages, prefix="as_graph")
581
+
582
+ # Step 3: (No graph bundle matching, just collect images)
583
+
584
+ # Step 4: Grading - send both transcripts to grading model, inject graph image info
585
+ print("2) Preparing grading input and sending to Gemini for grading...")
586
+ grading_input = (
587
+ "=== QP+MS TRANSCRIPT BEGIN ===\n"
588
+ + qpms_text
589
+ + "\n=== QP+MS TRANSCRIPT END ===\n\n"
590
+ + "=== ANSWER SHEET TRANSCRIPT BEGIN ===\n"
591
+ + as_text
592
+ + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
593
+ )
594
+ # Inject graph image note
595
+ if ms_graph_images or as_graph_images:
596
+ graph_note = "\n\n---\nSome questions require graphs. I’ve attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
597
+ grading_input += graph_note
598
+ grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
599
+ # Pass images as additional input to gemini_generate_content
600
+ grading_images = ms_graph_images + as_graph_images
601
+ grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
602
+ print("🧾 Grading output received. Saving debug file: debug_grading.md")
603
+ with open("debug_grading.md", "w", encoding="utf-8") as f:
604
+ f.write(grading_text)
605
+
606
+ # Save grading PDF
607
+ base_name = os.path.splitext(os.path.basename(ans_path))[0]
608
+ grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
609
+ print("πŸ“„ Grading PDF saved:", grading_pdf_path)
610
+
611
+ # Step 4: Extract marks for imprinting
612
+ grading_json = extract_marks_from_grading(grading_text)
613
+ with open("debug_grading_json.json", "w", encoding="utf-8") as f:
614
+ json.dump(grading_json, f, indent=2, ensure_ascii=False)
615
+ print("πŸ”§ Grading marks extraction complete.")
616
+
617
+ imprinted_pdf_path = None
618
+ if imprint:
619
+ print("✍ Imprint option enabled. Starting imprinting process (parallel mapping requests)...")
620
+ imprinted_pdf_path = f"{base_name}_imprinted.pdf"
621
+ imprinted_pdf_path = imprint_marks_using_mapping(ans_path, grading_json, imprinted_pdf_path, model, extracted_ids)
622
+ print("βœ… Imprinting finished. Imprinted PDF at:", imprinted_pdf_path)
623
+
624
+ print("🏁 Pipeline finished successfully.")
625
+ return qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path
626
+
627
+ except Exception as e:
628
+ print("❌ Pipeline error:", e)
629
+ return f"❌ Error: {e}", None, None, None, None
630
 
631
  # ---------------- GRADIO UI ----------------
632
  with gr.Blocks(title="LeadIB AI Grading (Final Flow β€” Verbose)") as demo:
 
666
  )
667
 
668
  if __name__ == "__main__":
669
+ demo.launch()