atz21 commited on
Commit
4e28843
Β·
verified Β·
1 Parent(s): 45f3df3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -145
app.py CHANGED
@@ -2,21 +2,20 @@ import os
2
  import re
3
  import json
4
  import subprocess
5
- import tempfile
6
  import time
7
  import img2pdf
8
  import gradio as gr
9
- import google.generativeai as genai
10
- from markdown_pdf import MarkdownPdf, Section
11
  from pdf2image import convert_from_path
12
  from PIL import Image, ImageDraw, ImageFont
13
  import cv2
14
  import numpy as np
15
- from concurrent.futures import ThreadPoolExecutor, as_completed
16
  from PyPDF2 import PdfReader, PdfWriter
17
 
18
  # ---------------- CONFIG ----------------
19
- genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 
20
  GRID_ROWS, GRID_COLS = 20, 14
21
 
22
  # ---------------- PROMPTS ----------------
@@ -28,36 +27,57 @@ INPUT: This file is a PDF that first contains the Question Paper and immediately
28
  TASK:
29
  1. Transcribe EXACTLY all the questions FIRST (with their total marks).
30
  2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
31
- 3. Always number the questions sequentially (Question 1, Question 2, Question 3, …) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank. Never start a question other than question 1 ( even if it is labelled in pdf as 8 name it 1)
32
- 4. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
 
 
33
  FORMAT:
34
  ==== PAPER TOTAL MARKS ====
35
  <total marks>
 
36
  ==== QUESTIONS BEGIN ====
37
- Question 1.i
38
  Total Marks: <number>
39
  QP: <question text>
40
  --QUESTION-END--
41
- Question 1.ii
 
 
 
 
 
 
42
  Total Marks: <number>
43
  QP: <question text>
44
  --QUESTION-END--
 
45
  (repeat for all questions in order of appearance)
 
46
  ==== QUESTIONS END ====
 
47
  ==== MARKSCHEME BEGIN ====
48
- Answer 1.i:
49
- <exact MS for Q1.i with notations M1, A1, R1 etc>
50
- Answer 1.ii:
51
- <exact MS for Q1.ii with notations>
 
 
52
  Answer 2 :
53
  <exact MS for Q2 with notations>
 
54
  (repeat for all answers)
 
55
  ==== MARKSCHEME END ====
56
- ==== GRAPH EXPECTED QUESTIONS ====\nGraph expected in:\n- Question <number> β†’ Page <number>\n(one per line)\n==== END GRAPH EXPECTED ====\n"""
57
- }
 
 
 
 
 
 
58
  ,
59
 
60
- # GRADING_PROMPT unchanged except we will print steps around calling it
61
  "GRADING_PROMPT": {
62
  "role": "system",
63
  "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
@@ -76,39 +96,101 @@ Answer 2 :
76
  4. Accept valid equivalent forms unless otherwise specified.
77
  5. Apply FT where appropriate.
78
  6. Use proper notation: M1A0, A1, etc.
79
- 7. Any lost mark: use red `<span style=\"color:red\">M0</span>` and make Reason red.
80
  ---
81
  ## Output Format
82
  Produce two sections per question/sub-question, following this structure:
83
  ## Question <id>
84
  ### Markscheme vs Student Answer
85
- | Mark ID | Markscheme Expectation | Student’s Response | Awarded |
86
  |---------|------------------------|--------------------|---------|
87
  | M1_1 | Recognise GP | "r=0.9" | M1 |
88
- ➑️ **Total: X/Y**
89
  ---
90
- ### Examiner’s Report
91
  At the very end, provide a summary table:
92
  | Question Number | Marks | Remark |
93
  |-----------------|-------|--------|
94
- | 1 | X/Y | <remark> |
 
95
  Then show total clearly as a final line:
96
  `Total: <obtained_marks>/<max_marks>`
97
  NOTES:
98
  - The assistant will receive two transcripts: (1) QP+MS transcript (questions then markscheme) and (2) AS transcript (student answers). Use the QP+MS transcript as the authoritative source of question wording, total marks, and verbatim markscheme entries (M/A/R mark IDs).
99
  - Match student answers to question IDs and grade according to the provided verbatim markscheme.
100
  - For questions where a graph is expected and the student attempted a graph, you will be provided with the relevant markscheme and answer sheet graph images/pages. Use these for grading those questions with visual context. For all other questions, proceed as usual.
101
- - Produce full markdown as above. Ensure mark IDs used in the grading are present and consistent with the markscheme.
 
102
  """
103
  }
104
  }
105
 
106
  # ---------------- HELPERS ----------------
107
- def save_as_pdf(text, filename="output.pdf"):
108
- pdf = MarkdownPdf()
109
- pdf.add_section(Section(text, toc=False))
110
- pdf.save(filename)
111
- return filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
114
  if output_path is None:
@@ -145,25 +227,28 @@ def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
145
  print("❌ Compression error:", e)
146
  return input_path
147
 
148
- def create_model():
149
- """
150
- Create the Gemini model and print which model is selected.
151
- """
152
- try:
153
- print("⚑ Attempting to use gemini-2.5-pro model")
154
- model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
155
- print("βœ… Selected model: gemini-2.5-pro")
156
- return model
157
- except Exception as e:
158
- print("⚠️ Could not use gemini-2.5-pro:", e)
159
- try:
160
- print("⚑ Falling back to gemini-2.5-flash model")
161
- model = genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
162
- print("βœ… Selected model: gemini-2.5-flash")
163
- return model
164
- except Exception as e:
165
- print("❌ Failed to create any Gemini model:", e)
166
- raise
 
 
 
167
 
168
  def merge_pdfs(paths, output_path):
169
  writer = PdfWriter()
@@ -175,61 +260,70 @@ def merge_pdfs(paths, output_path):
175
  writer.write(f)
176
  return output_path
177
 
178
- def gemini_generate_content(model, prompt_text, file_upload_obj=None, image_obj=None):
179
  """
180
- Send prompt_text and optionally an uploaded file (or an image object/list) to the model.
181
  Returns textual response and prints progress.
182
  """
183
- inputs = [prompt_text]
 
184
  if file_upload_obj:
185
- inputs.append(file_upload_obj)
 
186
  if image_obj:
187
- # Handle both single images and lists of images
188
  if isinstance(image_obj, list):
189
- # Convert image paths to PIL Image objects
190
  for img_path in image_obj:
191
  if isinstance(img_path, str):
192
- # It's a file path, load as PIL Image
193
  pil_img = Image.open(img_path)
194
- inputs.append(pil_img)
195
  else:
196
- # It's already an image object
197
- inputs.append(img_path)
198
  else:
199
- # Single image
200
  if isinstance(image_obj, str):
201
  pil_img = Image.open(image_obj)
202
- inputs.append(pil_img)
203
  else:
204
- inputs.append(image_obj)
 
205
  print("πŸ“‘ Sending request to Gemini (prompt length:", len(prompt_text), "chars )")
206
- response = model.generate_content(inputs)
207
- raw_text = getattr(response, "text", None)
208
- if not raw_text and getattr(response, "candidates", None):
209
- raw_text = response.candidates[0].content.parts[0].text
210
- if raw_text is None:
211
- raw_text = str(response)
212
- print("πŸ“₯ Received response (chars):", len(raw_text))
213
- return raw_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  # ---------------- PARSERS ----------------
216
  def extract_question_ids_from_qpms(text: str):
217
- """Extract question IDs from QP+MS transcript.
218
- Two-step approach: explicit 'Question X' lines, then fallback numbered lists.
219
- Robust to hidden whitespace and simple unicode spaces."""
220
  print("πŸ”Ž Extracting question IDs from QP+MS transcript using regex...")
221
 
222
- # Normalize spaces/tabs/non-breaking spaces
223
  clean_text = text.replace("\u00A0", " ").replace("\t", " ")
224
 
225
- # Step 1: Look for explicit "Question X" lines
226
  primary_matches = re.findall(r"^\s*Question\s*[:\s]\s*([\dA-Za-z.()]+)", clean_text, re.MULTILINE)
227
  if primary_matches:
228
  print(f"βœ… Extracted {len(primary_matches)} question IDs from explicit 'Question X' lines.")
229
  print("IDs:", primary_matches)
230
  return primary_matches
231
 
232
- # Step 2: Fallback β€” numbered/sub-question lists
233
  fallback_matches = re.findall(r"^\s*(\d+(?:[.)]|\([a-zA-Z0-9]+\))?[a-zA-Z0-9]*)", clean_text, re.MULTILINE)
234
  if fallback_matches:
235
  print(f"βœ… Extracted {len(fallback_matches)} question IDs (fallback numbered lists).")
@@ -238,12 +332,9 @@ def extract_question_ids_from_qpms(text: str):
238
  print("⚠️ No question IDs extracted; will send NA placeholder.")
239
  return fallback_matches
240
 
241
- # Update AS prompt builder to include graph detection
242
-
243
  def build_as_prompt_with_expected_ids(expected_ids, qpms_text=None):
244
  """
245
  Construct the AS transcription prompt injecting the expected IDs block and graph detection instructions.
246
- If qpms_text is provided, instruct the LLM to refer to it for ambiguous handwriting.
247
  """
248
  if not expected_ids:
249
  ids_block = "{NA}"
@@ -276,8 +367,6 @@ AS:
276
  ==== GRAPH FOUND ANSWERS ====\nGraph found in:\n- Answer <number> β†’ Page <number>\n(one per line)\n==== END GRAPH FOUND ===="""
277
  return prompt
278
 
279
- # Robust parsing functions for graph detection
280
-
281
  def extract_graph_questions_from_ms(text: str):
282
  """Extract graph questions and page numbers from MS transcript."""
283
  clean_text = text.replace("\u00A0", " ").replace("\t", " ")
@@ -313,9 +402,7 @@ def extract_graph_answers_from_as(text: str):
313
 
314
  def extract_marks_from_grading(grading_text):
315
  """
316
- Parse the grading markdown produced by the GRADING_PROMPT and extract marks per question.
317
- Returns dict: {"grading": [{"question": "1.a", "marks_awarded": ["M1","A1"]}, ...]}
318
- Preserves all marks in order, including duplicates.
319
  """
320
  print("πŸ”Ž Extracting awarded marks from grading output...")
321
  grading_json = {"grading": []}
@@ -338,10 +425,9 @@ def extract_marks_from_grading(grading_text):
338
  return grading_json
339
 
340
  # ---------------- MAPPING/IMPRINT HELPERS ----------------
341
- def ask_gemini_for_mapping_batch(model, image_paths, grading_json, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
342
  """
343
  Send multiple page images together to Gemini for batch mapping processing.
344
- More efficient than sending one by one.
345
  """
346
  ids_block = "{NA}"
347
  if expected_ids:
@@ -363,23 +449,30 @@ Return JSON only, like:
363
  Grading JSON:
364
  {json.dumps(grading_json, indent=2)}"""
365
 
366
- # Load all images
367
  images = [Image.open(p) for p in image_paths]
368
 
369
  print(f"πŸ“‘ Sending batch mapping request for {len(image_paths)} pages to Gemini...")
370
- response = model.generate_content([prompt, *images])
371
 
372
- raw_text = getattr(response, "text", None)
373
- if not raw_text and getattr(response, "candidates", None):
374
- raw_text = response.candidates[0].content.parts[0].text
375
- if not raw_text:
376
- raw_text = str(response)
 
 
 
 
 
 
 
 
 
 
377
 
378
  print("πŸ“₯ Batch mapping response (chars):", len(raw_text))
379
  print("πŸ”Ž Gemini raw batch output:")
380
  print(raw_text)
381
 
382
- # Try to extract JSON from response
383
  try:
384
  match = re.search(r'(\[.*\])', raw_text, re.DOTALL)
385
  if match:
@@ -393,17 +486,16 @@ Grading JSON:
393
  print(f"❌ Failed to parse Gemini JSON mapping: {e}")
394
  return []
395
 
396
- def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
397
  """
398
  Convert PDF to images, create grid-numbered images for batch sending to Gemini,
399
- then annotate and produce imprinted PDF using batch processing for better efficiency.
400
  """
401
  print("πŸ“„ Converting answer PDF to images for imprinting...")
402
  pages = convert_from_path(pdf_path, dpi=200)
403
  annotated_page_paths = []
404
  temp_grid_images = []
405
 
406
- # Create grid images for Gemini
407
  for p_index, page in enumerate(pages):
408
  img = page.convert("RGB")
409
  w, h = img.size
@@ -432,18 +524,16 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
432
  temp_grid_images.append(temp_path)
433
  print("πŸ›° Created grid image:", temp_path)
434
 
435
- # Send pages in batches to Gemini for mapping
436
  print("πŸ“‘ Sending page images to Gemini in batches for mapping...")
437
- batch_size = 10 # Process 10 pages at a time
438
  all_mappings = []
439
 
440
  for start in range(0, len(temp_grid_images), batch_size):
441
  batch_paths = temp_grid_images[start:start+batch_size]
442
- batch_mapping = ask_gemini_for_mapping_batch(model, batch_paths, grading_json, expected_ids, rows, cols)
443
  all_mappings.extend(batch_mapping)
444
  print(f"βœ… Processed batch {start//batch_size + 1}: pages {start+1}-{start+len(batch_paths)}")
445
 
446
- # Annotate original pages according to returned mappings
447
  print("πŸ–Š Annotating pages with marks...")
448
  for p_index, page in enumerate(pages):
449
  page_num = p_index + 1
@@ -453,7 +543,6 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
453
  h, w, _ = img_cv.shape
454
  cell_w_px, cell_h_px = w / cols, h / rows
455
 
456
- # Filter mappings for this page
457
  page_mappings = [m for m in all_mappings if m.get("page") == page_num]
458
 
459
  for item in page_mappings:
@@ -472,11 +561,9 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
472
  row = (cell_number - 1) // cols
473
  col = (cell_number - 1) % cols
474
 
475
- # Position marks to the right of the answer, with fallback to left
476
  x_c = int((col + 1) * cell_w_px - cell_w_px / 4)
477
  y_c = int((row + 0.5) * cell_h_px)
478
 
479
- # Use larger, more visible font
480
  font_scale = max(1.0, min(2.0, cell_h_px / 40.0))
481
  thickness = max(2, int(font_scale * 2))
482
  cv2.putText(img_cv, marks_text, (x_c, y_c), cv2.FONT_HERSHEY_SIMPLEX,
@@ -488,7 +575,6 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
488
  annotated_page_paths.append(annotated_path)
489
  print("βœ… Annotated page saved:", annotated_path)
490
 
491
- # Merge annotated pages into final PDF
492
  print("πŸ“‘ Merging annotated pages into final PDF...")
493
  with open(output_pdf, "wb") as f:
494
  f.write(img2pdf.convert(annotated_page_paths))
@@ -497,21 +583,14 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expec
497
  print("πŸ“‘ Imprinted PDF saved to:", compressed)
498
  return compressed
499
 
500
- # ---------------- GRAPH DETECTION HELPERS ----------------
501
- # These functions are now robustly handled by the new_code, so they are no longer needed here.
502
-
503
- # ---------------- GRAPH PAGE EXTRACTION HELPER ----------------
504
  def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
505
  """
506
  Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
507
- Prints to console when extracting each page.
508
  """
509
  unique_pages = sorted(set(page_numbers))
510
  images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
511
  out_paths = []
512
  for idx, page_num in enumerate(unique_pages):
513
- # pdf2image returns images in order, but if not contiguous, we need to map
514
- # So, get the image for this page (1-based)
515
  img_idx = page_num - min(unique_pages)
516
  img = images[img_idx]
517
  out_path = f"{prefix}_page_{page_num}.png"
@@ -520,42 +599,33 @@ def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
520
  out_paths.append(out_path)
521
  return out_paths
522
 
523
- # ---------------- PIPELINE UPDATE FOR GRAPH-AWARE GRADING ----------------
524
  def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
525
  """
526
- Final pipeline implementing requested flow and verbose console logging.
527
- Now includes Graph-Aware Grading logic.
528
  """
529
  try:
530
  print("πŸ” Starting pipeline...")
531
- # Step 0: compress as needed
532
  qp_path = compress_pdf(qp_path)
533
  ms_path = compress_pdf(ms_path)
534
  ans_path = compress_pdf(ans_path)
535
 
536
- # Merge QP + MS
537
  merged_qpms_path = os.path.splitext(qp_path)[0] + "_merged_qp_ms.pdf"
538
  merge_pdfs([qp_path, ms_path], merged_qpms_path)
539
  print("πŸ“Ž Merged QP + MS ->", merged_qpms_path)
540
 
541
- # Upload files to Gemini
542
  print("πŸ”Ό Uploading files to Gemini...")
543
- merged_uploaded = genai.upload_file(path=merged_qpms_path, display_name="QP+MS (merged)")
544
- ans_uploaded = genai.upload_file(path=ans_path, display_name="Answer Sheet")
545
  print("βœ… Upload complete.")
546
 
547
- # Create model and print which selected
548
- model = create_model()
549
-
550
- # Step 1.i: QP+MS transcription (first)
551
  print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
552
  qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β†’ Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
553
- qpms_text = gemini_generate_content(model, qpms_prompt, file_upload_obj=merged_uploaded)
554
  print("πŸ“„ QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
555
  with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
556
  f.write(qpms_text)
557
 
558
- # Step 1.i.a: Extract graph-expected questions from MS
559
  ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
560
  print("πŸ–ΌοΈ Graph-expected questions in MS:", ms_graph_mapping)
561
  ms_graph_pages = list(ms_graph_mapping.values())
@@ -563,20 +633,17 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
563
  if ms_graph_pages:
564
  ms_graph_images = extract_pdf_pages_as_images(merged_qpms_path, ms_graph_pages, prefix="qpms_graph")
565
 
566
- # Step 2: extract serial numbers (question IDs) using regex from qpms_text
567
  extracted_ids = extract_question_ids_from_qpms(qpms_text)
568
  if not extracted_ids:
569
  extracted_ids = ["NA"]
570
 
571
- # Step 1.ii: Build AS prompt injecting extracted IDs and transcribe AS
572
  print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
573
  as_prompt = build_as_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β†’ Page <number>\n(One per line, after all answers)"
574
- as_text = gemini_generate_content(model, as_prompt, file_upload_obj=ans_uploaded)
575
  print("πŸ“ AS transcription received. Saving debug file: debug_as_transcript.txt")
576
  with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
577
  f.write(as_text)
578
 
579
- # Step 2.a: Extract graph-attempted answers from AS
580
  as_graph_mapping = extract_graph_answers_from_as(as_text)
581
  print("πŸ–ΌοΈ Graph-attempted answers in AS:", as_graph_mapping)
582
  as_graph_pages = list(as_graph_mapping.values())
@@ -584,9 +651,6 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
584
  if as_graph_pages:
585
  as_graph_images = extract_pdf_pages_as_images(ans_path, as_graph_pages, prefix="as_graph")
586
 
587
- # Step 3: (No graph bundle matching, just collect images)
588
-
589
- # Step 4: Grading - send both transcripts to grading model, inject graph image info
590
  print("2) Preparing grading input and sending to Gemini for grading...")
591
  grading_input = (
592
  "=== QP+MS TRANSCRIPT BEGIN ===\n"
@@ -596,24 +660,20 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
596
  + as_text
597
  + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
598
  )
599
- # Inject graph image note
600
  if ms_graph_images or as_graph_images:
601
- graph_note = "\n\n---\nSome questions require graphs. I’ve attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
602
  grading_input += graph_note
603
  grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
604
- # Pass images as additional input to gemini_generate_content
605
  grading_images = ms_graph_images + as_graph_images
606
- grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
607
  print("🧾 Grading output received. Saving debug file: debug_grading.md")
608
  with open("debug_grading.md", "w", encoding="utf-8") as f:
609
  f.write(grading_text)
610
 
611
- # Save grading PDF
612
  base_name = os.path.splitext(os.path.basename(ans_path))[0]
613
  grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
614
  print("πŸ“„ Grading PDF saved:", grading_pdf_path)
615
 
616
- # Step 4: Extract marks for imprinting
617
  grading_json = extract_marks_from_grading(grading_text)
618
  with open("debug_grading_json.json", "w", encoding="utf-8") as f:
619
  json.dump(grading_json, f, indent=2, ensure_ascii=False)
@@ -621,9 +681,9 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
621
 
622
  imprinted_pdf_path = None
623
  if imprint:
624
- print("✍ Imprint option enabled. Starting imprinting process (parallel mapping requests)...")
625
  imprinted_pdf_path = f"{base_name}_imprinted.pdf"
626
- imprinted_pdf_path = imprint_marks_using_mapping(ans_path, grading_json, imprinted_pdf_path, model, extracted_ids)
627
  print("βœ… Imprinting finished. Imprinted PDF at:", imprinted_pdf_path)
628
 
629
  print("🏁 Pipeline finished successfully.")
@@ -631,11 +691,23 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
631
 
632
  except Exception as e:
633
  print("❌ Pipeline error:", e)
 
 
634
  return f"❌ Error: {e}", None, None, None, None
635
 
636
  # ---------------- GRADIO UI ----------------
637
- with gr.Blocks(title=" AI Grading (Final Flow )") as demo:
638
- gr.Markdown("## πŸ“˜ AI Grading β€” Final Flow")
 
 
 
 
 
 
 
 
 
 
639
 
640
  with gr.Row():
641
  qp_file = gr.File(label="πŸ“„ Upload Question Paper (PDF)")
@@ -646,10 +718,17 @@ with gr.Blocks(title=" AI Grading (Final Flow )") as demo:
646
  run_button = gr.Button("πŸš€ Run Pipeline")
647
 
648
  with gr.Row():
649
- grading_pdf_file = gr.File(label="πŸ“₯ Download Grading PDF")
650
- imprint_pdf_file = gr.File(label="πŸ“₯ Download Imprinted PDF (Optional)")
 
 
 
 
651
 
652
  def run_pipeline(qp_file_obj, ms_file_obj, ans_file_obj, imprint_flag):
 
 
 
653
  qp_path = qp_file_obj.name
654
  ms_path = ms_file_obj.name
655
  ans_path = ans_file_obj.name
@@ -658,13 +737,13 @@ with gr.Blocks(title=" AI Grading (Final Flow )") as demo:
658
  qp_path, ms_path, ans_path, imprint=imprint_flag
659
  )
660
 
661
- return grading_pdf_path, imprinted_pdf_path
662
 
663
  run_button.click(
664
  fn=run_pipeline,
665
  inputs=[qp_file, ms_file, ans_file, imprint_toggle],
666
- outputs=[grading_pdf_file, imprint_pdf_file]
667
  )
668
 
669
  if __name__ == "__main__":
670
- demo.launch()
 
2
  import re
3
  import json
4
  import subprocess
 
5
  import time
6
  import img2pdf
7
  import gradio as gr
8
+ from google import genai # NEW SDK
9
+ import pypandoc
10
  from pdf2image import convert_from_path
11
  from PIL import Image, ImageDraw, ImageFont
12
  import cv2
13
  import numpy as np
 
14
  from PyPDF2 import PdfReader, PdfWriter
15
 
16
  # ---------------- CONFIG ----------------
17
+ # Create client with new SDK
18
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
19
  GRID_ROWS, GRID_COLS = 20, 14
20
 
21
  # ---------------- PROMPTS ----------------
 
27
  TASK:
28
  1. Transcribe EXACTLY all the questions FIRST (with their total marks).
29
  2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
30
+ 3. Always number the questions sequentially (Question 1, Question 2, Question 3, …) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank. Never start a question other than question 1 (even if it is labelled in pdf as 8 name it 1).
31
+ 4. If a question or sub-question is labelled with a letter (e.g., "Q1.a", "Q2(b)", "1 (c)(i)"), transcribe it as "Question 1.a", "Question 2.b", "Question 1.c.i" etc., exactly preserving the hierarchy of sub-question identifiers.
32
+ 5. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
33
+
34
  FORMAT:
35
  ==== PAPER TOTAL MARKS ====
36
  <total marks>
37
+
38
  ==== QUESTIONS BEGIN ====
39
+ Question 1.a
40
  Total Marks: <number>
41
  QP: <question text>
42
  --QUESTION-END--
43
+
44
+ Question 1.b
45
+ Total Marks: <number>
46
+ QP: <question text>
47
+ --QUESTION-END--
48
+
49
+ Question 2
50
  Total Marks: <number>
51
  QP: <question text>
52
  --QUESTION-END--
53
+
54
  (repeat for all questions in order of appearance)
55
+
56
  ==== QUESTIONS END ====
57
+
58
  ==== MARKSCHEME BEGIN ====
59
+ Answer 1.a:
60
+ <exact MS for Q1.a with notations M1, A1, R1 etc>
61
+
62
+ Answer 1.b:
63
+ <exact MS for Q1.b with notations>
64
+
65
  Answer 2 :
66
  <exact MS for Q2 with notations>
67
+
68
  (repeat for all answers)
69
+
70
  ==== MARKSCHEME END ====
71
+
72
+ ==== GRAPH EXPECTED QUESTIONS ====
73
+ Graph expected in:
74
+ - Question <number> β†’ Page <number>
75
+ (one per line)
76
+ ==== END GRAPH EXPECTED ====
77
+ """
78
+ }
79
  ,
80
 
 
81
  "GRADING_PROMPT": {
82
  "role": "system",
83
  "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
 
96
  4. Accept valid equivalent forms unless otherwise specified.
97
  5. Apply FT where appropriate.
98
  6. Use proper notation: M1A0, A1, etc.
99
+ 7. Any lost mark: use red `<span style=\"color:red\">M0</span>` , similarly make markscheme expected , student response and awarded marks in red include it in <span> tage
100
  ---
101
  ## Output Format
102
  Produce two sections per question/sub-question, following this structure:
103
  ## Question <id>
104
  ### Markscheme vs Student Answer
105
+ | Mark ID | Markscheme Expectation | Student's Response | Awarded |
106
  |---------|------------------------|--------------------|---------|
107
  | M1_1 | Recognise GP | "r=0.9" | M1 |
108
+ **Total: X/Y**
109
  ---
110
+ ### Examiner's Report
111
  At the very end, provide a summary table:
112
  | Question Number | Marks | Remark |
113
  |-----------------|-------|--------|
114
+ | 1 | X/Y | A |
115
+ | 2 | X/Y | B |
116
  Then show total clearly as a final line:
117
  `Total: <obtained_marks>/<max_marks>`
118
  NOTES:
119
  - The assistant will receive two transcripts: (1) QP+MS transcript (questions then markscheme) and (2) AS transcript (student answers). Use the QP+MS transcript as the authoritative source of question wording, total marks, and verbatim markscheme entries (M/A/R mark IDs).
120
  - Match student answers to question IDs and grade according to the provided verbatim markscheme.
121
  - For questions where a graph is expected and the student attempted a graph, you will be provided with the relevant markscheme and answer sheet graph images/pages. Use these for grading those questions with visual context. For all other questions, proceed as usual.
122
+ - Produce full markdown as above. Ensure mark IDs used in the grading are present and consistent with the markscheme.
123
+ - give grade in remark one of the following A : All Good B : Silly Mistake C : Conceptual Error D : Hard question E : Not Applicable
124
  """
125
  }
126
  }
127
 
128
  # ---------------- HELPERS ----------------
129
+ def save_as_pdf(text, filename="output.pdf"):
130
+ """Convert markdown to PDF using pandoc - handles long content without truncation"""
131
+ try:
132
+ # Clean HTML for better compatibility
133
+ import re
134
+ clean_text = re.sub(r'<span style="color:red">(.*?)</span>', r'**[\1]**', text)
135
+
136
+ # Save temporary markdown
137
+ temp_md = f"{filename}_temp.md"
138
+ with open(temp_md, 'w', encoding='utf-8') as f:
139
+ f.write(clean_text)
140
+
141
+ print(f"πŸ“ Converting markdown to PDF using pandoc...")
142
+
143
+ # Convert to PDF with pandoc
144
+ try:
145
+ pypandoc.convert_file(
146
+ temp_md, 'pdf',
147
+ outputfile=filename,
148
+ extra_args=[
149
+ '--pdf-engine=xelatex',
150
+ '-V', 'geometry:margin=0.75in',
151
+ '-V', 'fontsize=10pt',
152
+ '-V', 'linestretch=1.2',
153
+ '--standalone'
154
+ ]
155
+ )
156
+ except RuntimeError:
157
+ # Try with pdflatex if xelatex fails
158
+ print("⚠️ xelatex failed, trying pdflatex...")
159
+ pypandoc.convert_file(
160
+ temp_md, 'pdf',
161
+ outputfile=filename,
162
+ extra_args=[
163
+ '--pdf-engine=pdflatex',
164
+ '-V', 'geometry:margin=0.75in',
165
+ '-V', 'fontsize=10pt'
166
+ ]
167
+ )
168
+
169
+ # Cleanup
170
+ if os.path.exists(temp_md):
171
+ os.remove(temp_md)
172
+
173
+ # Verify the file was created
174
+ if os.path.exists(filename):
175
+ size = os.path.getsize(filename)
176
+ print(f"βœ… PDF saved successfully: {filename} ({size/1024:.1f} KB)")
177
+ return filename
178
+ else:
179
+ raise Exception("PDF file was not created")
180
+
181
+ except Exception as e:
182
+ print(f"❌ PDF conversion error: {e}")
183
+ print("πŸ’‘ Make sure pandoc is installed: https://pandoc.org/installing.html")
184
+ print(" Ubuntu/Debian: sudo apt-get install pandoc texlive-xetex")
185
+ print(" macOS: brew install pandoc basictex")
186
+ print(" Windows: Download from https://pandoc.org/installing.html")
187
+
188
+ # Fallback to text file
189
+ txt_file = filename.replace('.pdf', '.txt')
190
+ with open(txt_file, 'w', encoding='utf-8') as f:
191
+ f.write(text)
192
+ print(f"⚠️ Saved as text file instead: {txt_file}")
193
+ return txt_file
194
 
195
  def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
196
  if output_path is None:
 
227
  print("❌ Compression error:", e)
228
  return input_path
229
 
230
+ def upload_to_gemini(path, display_name=None):
231
+ """
232
+ Upload a file to Gemini using the NEW google-genai SDK.
233
+ """
234
+ print(f"πŸ“€ Uploading {path} to Gemini...")
235
+ try:
236
+ uploaded_file = client.files.upload(file=path)
237
+
238
+ # Wait for processing to complete
239
+ print(f"⏳ Waiting for file processing: {uploaded_file.name}")
240
+ while uploaded_file.state.name == "PROCESSING":
241
+ time.sleep(2)
242
+ uploaded_file = client.files.get(name=uploaded_file.name)
243
+
244
+ if uploaded_file.state.name == "FAILED":
245
+ raise Exception(f"File processing failed: {uploaded_file.name}")
246
+
247
+ print(f"βœ… Uploaded and processed: {uploaded_file.name}")
248
+ return uploaded_file
249
+ except Exception as e:
250
+ print(f"❌ Upload failed for {path}: {e}")
251
+ raise
252
 
253
  def merge_pdfs(paths, output_path):
254
  writer = PdfWriter()
 
260
  writer.write(f)
261
  return output_path
262
 
263
+ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, model_name="gemini-2.0-flash-exp"):
264
  """
265
+ Send prompt_text and optionally an uploaded file (or an image object/list) to the model using NEW SDK.
266
  Returns textual response and prints progress.
267
  """
268
+ contents = [prompt_text]
269
+
270
  if file_upload_obj:
271
+ contents.append(file_upload_obj)
272
+
273
  if image_obj:
 
274
  if isinstance(image_obj, list):
 
275
  for img_path in image_obj:
276
  if isinstance(img_path, str):
 
277
  pil_img = Image.open(img_path)
278
+ contents.append(pil_img)
279
  else:
280
+ contents.append(img_path)
 
281
  else:
 
282
  if isinstance(image_obj, str):
283
  pil_img = Image.open(image_obj)
284
+ contents.append(pil_img)
285
  else:
286
+ contents.append(image_obj)
287
+
288
  print("πŸ“‘ Sending request to Gemini (prompt length:", len(prompt_text), "chars )")
289
+
290
+ try:
291
+ response = client.models.generate_content(
292
+ model=model_name,
293
+ contents=contents
294
+ )
295
+ raw_text = response.text
296
+ print("πŸ“₯ Received response (chars):", len(raw_text))
297
+ return raw_text
298
+ except Exception as e:
299
+ print(f"❌ Generation failed: {e}")
300
+ # Try fallback model
301
+ print("⚑ Trying fallback model: gemini-1.5-flash")
302
+ try:
303
+ response = client.models.generate_content(
304
+ model="gemini-1.5-flash",
305
+ contents=contents
306
+ )
307
+ raw_text = response.text
308
+ print("πŸ“₯ Received response (chars):", len(raw_text))
309
+ return raw_text
310
+ except Exception as e2:
311
+ print(f"❌ Fallback also failed: {e2}")
312
+ raise
313
 
314
  # ---------------- PARSERS ----------------
315
  def extract_question_ids_from_qpms(text: str):
316
+ """Extract question IDs from QP+MS transcript."""
 
 
317
  print("πŸ”Ž Extracting question IDs from QP+MS transcript using regex...")
318
 
 
319
  clean_text = text.replace("\u00A0", " ").replace("\t", " ")
320
 
 
321
  primary_matches = re.findall(r"^\s*Question\s*[:\s]\s*([\dA-Za-z.()]+)", clean_text, re.MULTILINE)
322
  if primary_matches:
323
  print(f"βœ… Extracted {len(primary_matches)} question IDs from explicit 'Question X' lines.")
324
  print("IDs:", primary_matches)
325
  return primary_matches
326
 
 
327
  fallback_matches = re.findall(r"^\s*(\d+(?:[.)]|\([a-zA-Z0-9]+\))?[a-zA-Z0-9]*)", clean_text, re.MULTILINE)
328
  if fallback_matches:
329
  print(f"βœ… Extracted {len(fallback_matches)} question IDs (fallback numbered lists).")
 
332
  print("⚠️ No question IDs extracted; will send NA placeholder.")
333
  return fallback_matches
334
 
 
 
335
  def build_as_prompt_with_expected_ids(expected_ids, qpms_text=None):
336
  """
337
  Construct the AS transcription prompt injecting the expected IDs block and graph detection instructions.
 
338
  """
339
  if not expected_ids:
340
  ids_block = "{NA}"
 
367
  ==== GRAPH FOUND ANSWERS ====\nGraph found in:\n- Answer <number> β†’ Page <number>\n(one per line)\n==== END GRAPH FOUND ===="""
368
  return prompt
369
 
 
 
370
  def extract_graph_questions_from_ms(text: str):
371
  """Extract graph questions and page numbers from MS transcript."""
372
  clean_text = text.replace("\u00A0", " ").replace("\t", " ")
 
402
 
403
  def extract_marks_from_grading(grading_text):
404
  """
405
+ Parse the grading markdown and extract marks per question.
 
 
406
  """
407
  print("πŸ”Ž Extracting awarded marks from grading output...")
408
  grading_json = {"grading": []}
 
425
  return grading_json
426
 
427
  # ---------------- MAPPING/IMPRINT HELPERS ----------------
428
+ def ask_gemini_for_mapping_batch(image_paths, grading_json, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
429
  """
430
  Send multiple page images together to Gemini for batch mapping processing.
 
431
  """
432
  ids_block = "{NA}"
433
  if expected_ids:
 
449
  Grading JSON:
450
  {json.dumps(grading_json, indent=2)}"""
451
 
 
452
  images = [Image.open(p) for p in image_paths]
453
 
454
  print(f"πŸ“‘ Sending batch mapping request for {len(image_paths)} pages to Gemini...")
 
455
 
456
+ try:
457
+ contents = [prompt] + images
458
+ response = client.models.generate_content(
459
+ model="gemini-2.0-flash-exp",
460
+ contents=contents
461
+ )
462
+ raw_text = response.text
463
+ except:
464
+ print("⚠️ Trying fallback model for mapping...")
465
+ contents = [prompt] + images
466
+ response = client.models.generate_content(
467
+ model="gemini-1.5-flash",
468
+ contents=contents
469
+ )
470
+ raw_text = response.text
471
 
472
  print("πŸ“₯ Batch mapping response (chars):", len(raw_text))
473
  print("πŸ”Ž Gemini raw batch output:")
474
  print(raw_text)
475
 
 
476
  try:
477
  match = re.search(r'(\[.*\])', raw_text, re.DOTALL)
478
  if match:
 
486
  print(f"❌ Failed to parse Gemini JSON mapping: {e}")
487
  return []
488
 
489
+ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
490
  """
491
  Convert PDF to images, create grid-numbered images for batch sending to Gemini,
492
+ then annotate and produce imprinted PDF.
493
  """
494
  print("πŸ“„ Converting answer PDF to images for imprinting...")
495
  pages = convert_from_path(pdf_path, dpi=200)
496
  annotated_page_paths = []
497
  temp_grid_images = []
498
 
 
499
  for p_index, page in enumerate(pages):
500
  img = page.convert("RGB")
501
  w, h = img.size
 
524
  temp_grid_images.append(temp_path)
525
  print("πŸ›° Created grid image:", temp_path)
526
 
 
527
  print("πŸ“‘ Sending page images to Gemini in batches for mapping...")
528
+ batch_size = 10
529
  all_mappings = []
530
 
531
  for start in range(0, len(temp_grid_images), batch_size):
532
  batch_paths = temp_grid_images[start:start+batch_size]
533
+ batch_mapping = ask_gemini_for_mapping_batch(batch_paths, grading_json, expected_ids, rows, cols)
534
  all_mappings.extend(batch_mapping)
535
  print(f"βœ… Processed batch {start//batch_size + 1}: pages {start+1}-{start+len(batch_paths)}")
536
 
 
537
  print("πŸ–Š Annotating pages with marks...")
538
  for p_index, page in enumerate(pages):
539
  page_num = p_index + 1
 
543
  h, w, _ = img_cv.shape
544
  cell_w_px, cell_h_px = w / cols, h / rows
545
 
 
546
  page_mappings = [m for m in all_mappings if m.get("page") == page_num]
547
 
548
  for item in page_mappings:
 
561
  row = (cell_number - 1) // cols
562
  col = (cell_number - 1) % cols
563
 
 
564
  x_c = int((col + 1) * cell_w_px - cell_w_px / 4)
565
  y_c = int((row + 0.5) * cell_h_px)
566
 
 
567
  font_scale = max(1.0, min(2.0, cell_h_px / 40.0))
568
  thickness = max(2, int(font_scale * 2))
569
  cv2.putText(img_cv, marks_text, (x_c, y_c), cv2.FONT_HERSHEY_SIMPLEX,
 
575
  annotated_page_paths.append(annotated_path)
576
  print("βœ… Annotated page saved:", annotated_path)
577
 
 
578
  print("πŸ“‘ Merging annotated pages into final PDF...")
579
  with open(output_pdf, "wb") as f:
580
  f.write(img2pdf.convert(annotated_page_paths))
 
583
  print("πŸ“‘ Imprinted PDF saved to:", compressed)
584
  return compressed
585
 
 
 
 
 
586
  def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
587
  """
588
  Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
 
589
  """
590
  unique_pages = sorted(set(page_numbers))
591
  images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
592
  out_paths = []
593
  for idx, page_num in enumerate(unique_pages):
 
 
594
  img_idx = page_num - min(unique_pages)
595
  img = images[img_idx]
596
  out_path = f"{prefix}_page_{page_num}.png"
 
599
  out_paths.append(out_path)
600
  return out_paths
601
 
602
+ # ---------------- PIPELINE ----------------
603
  def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
604
  """
605
+ Final pipeline with graph-aware grading logic using NEW SDK.
 
606
  """
607
  try:
608
  print("πŸ” Starting pipeline...")
 
609
  qp_path = compress_pdf(qp_path)
610
  ms_path = compress_pdf(ms_path)
611
  ans_path = compress_pdf(ans_path)
612
 
 
613
  merged_qpms_path = os.path.splitext(qp_path)[0] + "_merged_qp_ms.pdf"
614
  merge_pdfs([qp_path, ms_path], merged_qpms_path)
615
  print("πŸ“Ž Merged QP + MS ->", merged_qpms_path)
616
 
 
617
  print("πŸ”Ό Uploading files to Gemini...")
618
+ merged_uploaded = upload_to_gemini(merged_qpms_path)
619
+ ans_uploaded = upload_to_gemini(ans_path)
620
  print("βœ… Upload complete.")
621
 
 
 
 
 
622
  print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
623
  qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β†’ Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
624
+ qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded)
625
  print("πŸ“„ QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
626
  with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
627
  f.write(qpms_text)
628
 
 
629
  ms_graph_mapping = extract_graph_questions_from_ms(qpms_text)
630
  print("πŸ–ΌοΈ Graph-expected questions in MS:", ms_graph_mapping)
631
  ms_graph_pages = list(ms_graph_mapping.values())
 
633
  if ms_graph_pages:
634
  ms_graph_images = extract_pdf_pages_as_images(merged_qpms_path, ms_graph_pages, prefix="qpms_graph")
635
 
 
636
  extracted_ids = extract_question_ids_from_qpms(qpms_text)
637
  if not extracted_ids:
638
  extracted_ids = ["NA"]
639
 
 
640
  print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
641
  as_prompt = build_as_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β†’ Page <number>\n(One per line, after all answers)"
642
+ as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded)
643
  print("πŸ“ AS transcription received. Saving debug file: debug_as_transcript.txt")
644
  with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
645
  f.write(as_text)
646
 
 
647
  as_graph_mapping = extract_graph_answers_from_as(as_text)
648
  print("πŸ–ΌοΈ Graph-attempted answers in AS:", as_graph_mapping)
649
  as_graph_pages = list(as_graph_mapping.values())
 
651
  if as_graph_pages:
652
  as_graph_images = extract_pdf_pages_as_images(ans_path, as_graph_pages, prefix="as_graph")
653
 
 
 
 
654
  print("2) Preparing grading input and sending to Gemini for grading...")
655
  grading_input = (
656
  "=== QP+MS TRANSCRIPT BEGIN ===\n"
 
660
  + as_text
661
  + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
662
  )
 
663
  if ms_graph_images or as_graph_images:
664
+ graph_note = "\n\n---\nSome questions require graphs. I've attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
665
  grading_input += graph_note
666
  grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
 
667
  grading_images = ms_graph_images + as_graph_images
668
+ grading_text = gemini_generate_content(grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
669
  print("🧾 Grading output received. Saving debug file: debug_grading.md")
670
  with open("debug_grading.md", "w", encoding="utf-8") as f:
671
  f.write(grading_text)
672
 
 
673
  base_name = os.path.splitext(os.path.basename(ans_path))[0]
674
  grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
675
  print("πŸ“„ Grading PDF saved:", grading_pdf_path)
676
 
 
677
  grading_json = extract_marks_from_grading(grading_text)
678
  with open("debug_grading_json.json", "w", encoding="utf-8") as f:
679
  json.dump(grading_json, f, indent=2, ensure_ascii=False)
 
681
 
682
  imprinted_pdf_path = None
683
  if imprint:
684
+ print("✍ Imprint option enabled. Starting imprinting process...")
685
  imprinted_pdf_path = f"{base_name}_imprinted.pdf"
686
+ imprinted_pdf_path = imprint_marks_using_mapping(ans_path, grading_json, imprinted_pdf_path, extracted_ids)
687
  print("βœ… Imprinting finished. Imprinted PDF at:", imprinted_pdf_path)
688
 
689
  print("🏁 Pipeline finished successfully.")
 
691
 
692
  except Exception as e:
693
  print("❌ Pipeline error:", e)
694
+ import traceback
695
+ traceback.print_exc()
696
  return f"❌ Error: {e}", None, None, None, None
697
 
698
  # ---------------- GRADIO UI ----------------
699
+ with gr.Blocks(title="AI Grading (Fixed - Pandoc PDF)") as demo:
700
+ gr.Markdown("## πŸ“˜ AI Grading β€” Fixed with Pandoc PDF Conversion")
701
+ gr.Markdown("""
702
+ **βœ… Now using pypandoc for PDF conversion (no truncation issues!)**
703
+
704
+ ### Requirements:
705
+ - Install: `pip install pypandoc`
706
+ - Install pandoc system-wide:
707
+ - **Ubuntu/Debian**: `sudo apt-get install pandoc texlive-xetex`
708
+ - **macOS**: `brew install pandoc basictex`
709
+ - **Windows**: Download from https://pandoc.org/installing.html
710
+ """)
711
 
712
  with gr.Row():
713
  qp_file = gr.File(label="πŸ“„ Upload Question Paper (PDF)")
 
718
  run_button = gr.Button("πŸš€ Run Pipeline")
719
 
720
  with gr.Row():
721
+ qpms_box = gr.Textbox(label="πŸ“‘ QP+MS Transcript", lines=12)
722
+ as_box = gr.Textbox(label="πŸ“ AS Transcript", lines=12)
723
+
724
+ grading_output_box = gr.Textbox(label="🧾 Grading (Markdown)", lines=20)
725
+ grading_pdf_file = gr.File(label="πŸ“₯ Download Grading PDF")
726
+ imprint_pdf_file = gr.File(label="πŸ“₯ Download Imprinted PDF (Optional)")
727
 
728
  def run_pipeline(qp_file_obj, ms_file_obj, ans_file_obj, imprint_flag):
729
+ if not qp_file_obj or not ms_file_obj or not ans_file_obj:
730
+ return "❌ Please upload all three files", "", "", None, None
731
+
732
  qp_path = qp_file_obj.name
733
  ms_path = ms_file_obj.name
734
  ans_path = ans_file_obj.name
 
737
  qp_path, ms_path, ans_path, imprint=imprint_flag
738
  )
739
 
740
+ return qpms_text or "", as_text or "", grading_text or "", grading_pdf_path, imprinted_pdf_path
741
 
742
  run_button.click(
743
  fn=run_pipeline,
744
  inputs=[qp_file, ms_file, ans_file, imprint_toggle],
745
+ outputs=[qpms_box, as_box, grading_output_box, grading_pdf_file, imprint_pdf_file]
746
  )
747
 
748
  if __name__ == "__main__":
749
+ demo.launch()