atz21 commited on
Commit
d7fa86e
Β·
verified Β·
1 Parent(s): 8463bc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +390 -403
app.py CHANGED
@@ -6,12 +6,12 @@ import time
6
  import img2pdf
7
  import gradio as gr
8
  from google import genai # NEW SDK
9
- from markdown_pdf import MarkdownPdf, Section
10
  from pdf2image import convert_from_path
11
  from PIL import Image, ImageDraw, ImageFont
12
  import cv2
13
  import numpy as np
14
  from PyPDF2 import PdfReader, PdfWriter
 
15
 
16
  # ---------------- CONFIG ----------------
17
  # Create client with new SDK
@@ -19,279 +19,311 @@ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
19
  GRID_ROWS, GRID_COLS = 20, 14
20
 
21
  # ---------------- PROMPTS ----------------
22
- PROMPTS = {
23
- "QP_MS_TRANSCRIPTION" : {
24
- "role": "system",
25
- "content": """You are a high-quality OCR/Transcription assistant.
26
- INPUT: This file is a PDF that first contains the Question Paper and immediately after it the Markscheme.
27
- TASK:
28
- 1. Transcribe EXACTLY all the questions FIRST (with their total marks).
29
- 2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
30
- 3. Always number the questions sequentially (Question 1, Question 2, Question 3, …) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank. Never start a question other than question 1 (even if it is labelled in pdf as 8 name it 1).
31
- 4. If a question or sub-question is labelled with a letter (e.g., "Q1.a", "Q2(b)", "1 (c)(i)"), transcribe it as "Question 1.a", "Question 2.b", "Question 1.c.i" etc., exactly preserving the hierarchy of sub-question identifiers.
32
- 5. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
33
-
34
- FORMAT:
35
- ==== PAPER TOTAL MARKS ====
36
- <total marks>
37
-
38
- ==== QUESTIONS BEGIN ====
39
- Question 1.a
40
- Total Marks: <number>
41
- QP: <question text>
42
- --QUESTION-END--
43
-
44
- Question 1.b
45
- Total Marks: <number>
46
- QP: <question text>
47
- --QUESTION-END--
48
-
49
- Question 2
50
- Total Marks: <number>
51
- QP: <question text>
52
- --QUESTION-END--
53
-
54
- (repeat for all questions in order of appearance)
55
-
56
- ==== QUESTIONS END ====
57
-
58
- ==== MARKSCHEME BEGIN ====
59
- Answer 1.a:
60
- <exact MS for Q1.a with notations M1, A1, R1 etc>
61
-
62
- Answer 1.b:
63
- <exact MS for Q1.b with notations>
64
-
65
- Answer 2 :
66
- <exact MS for Q2 with notations>
67
-
68
- (repeat for all answers)
69
-
70
- ==== MARKSCHEME END ====
71
-
72
- ==== GRAPH EXPECTED QUESTIONS ====
73
- Graph expected in:
74
- - Question <number> β†’ Page <number>
75
- (one per line)
76
- ==== END GRAPH EXPECTED ====
77
- """
78
- }
79
- ,
80
-
81
- "GRADING_PROMPT": {
82
- "role": "system",
83
- "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
84
- ### Abbreviations:
85
- - **M**: Marks for Method
86
- - **A**: Marks for Accuracy/Answer
87
- - **R**: Marks for Reasoning
88
- - **AG**: Answer given in questionβ€”no marks
89
- - **FT**: Follow Through marks (if error carried forward correctly)
90
- - **MR**: Deduct for misread (once only)
91
- ---
92
- ## Grading Instructions
93
- 1. Award marks using official annotations (e.g., M1, A2).
94
- 2. Do not award full marks for answers alone; check for method marks.
95
- 3. A marks usually require a valid M mark first.
96
- 4. Accept valid equivalent forms unless otherwise specified.
97
- 5. Apply FT where appropriate.
98
- 6. Use proper notation: M1A0, A1, etc.
99
- 7. Any lost mark: use red `<span style=\"color:red\">M0</span>` , similarly make markscheme expected , student response and awarded marks in red include it in <span> tage
100
- ---
101
- ## Output Format
102
- Produce two sections per question/sub-question, following this structure:
103
- ## Question <id>
104
- ### Markscheme vs Student Answer
105
- | Mark ID | Markscheme Expectation | Student's Response | Awarded |
106
- |---------|------------------------|--------------------|---------|
107
- | M1_1 | Recognise GP | "r=0.9" | M1 |
108
- **Total: X/Y**
109
- ---
110
- ### Examiner's Report
111
- At the very end, provide a summary table:
112
- | Question Number | Marks | Remark |
113
- |-----------------|-------|--------|
114
- | 1 | X/Y | A |
115
- | 2 | X/Y | B |
116
- Then show total clearly as a final line:
117
- `Total: <obtained_marks>/<max_marks>`
118
- NOTES:
119
- - The assistant will receive two transcripts: (1) QP+MS transcript (questions then markscheme) and (2) AS transcript (student answers). Use the QP+MS transcript as the authoritative source of question wording, total marks, and verbatim markscheme entries (M/A/R mark IDs).
120
- - Match student answers to question IDs and grade according to the provided verbatim markscheme.
121
- - For questions where a graph is expected and the student attempted a graph, you will be provided with the relevant markscheme and answer sheet graph images/pages. Use these for grading those questions with visual context. For all other questions, proceed as usual.
122
- - Produce full markdown as above. Ensure mark IDs used in the grading are present and consistent with the markscheme.
123
- - give grade in remark one of the following A : All Good B : Silly Mistake C : Conceptual Error D : Hard question E : Not Applicable
124
- """
125
- }
126
- }
127
 
128
  # ---------------- HELPERS ----------------
129
- def save_as_pdf(text, filename="output.pdf"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  """
131
- Convert markdown to PDF using markdown_pdf library.
132
- Enhanced with error handling, content cleaning, and fallback mechanisms.
 
 
 
 
 
 
 
 
 
 
 
133
  """
 
 
 
 
134
  try:
135
- print(f"πŸ“„ Starting PDF conversion for {filename}...")
136
-
137
- # ============ STEP 1: Clean and prepare the text ============
138
- print("🧹 Cleaning markdown content...")
139
-
140
- # Remove or replace HTML tags that markdown_pdf can't handle
141
- clean_text = text
142
-
143
- # Replace red color spans with bold markdown (** for bold)
144
- clean_text = re.sub(r'<span\s+style\s*=\s*["\']color\s*:\s*red["\']>(.*?)</span>',
145
- r'**\1**', clean_text, flags=re.IGNORECASE)
146
-
147
- # Remove any other HTML tags
148
- clean_text = re.sub(r'<[^>]+>', '', clean_text)
149
-
150
- # Fix unicode issues
151
- clean_text = clean_text.replace('\u00A0', ' ') # Non-breaking space
152
- clean_text = clean_text.replace('\u2013', '-') # En dash
153
- clean_text = clean_text.replace('\u2014', '--') # Em dash
154
- clean_text = clean_text.replace('\u2019', "'") # Right single quote
155
- clean_text = clean_text.replace('\u201C', '"') # Left double quote
156
- clean_text = clean_text.replace('\u201D', '"') # Right double quote
157
-
158
- # Ensure proper line spacing for tables
159
- clean_text = re.sub(r'\n\|', r'\n\n|', clean_text)
160
- clean_text = re.sub(r'\|\n', r'|\n\n', clean_text)
161
-
162
- # Remove excessive blank lines (more than 2)
163
- clean_text = re.sub(r'\n{3,}', '\n\n', clean_text)
164
-
165
- print(f"βœ… Text cleaned. Length: {len(clean_text)} characters")
166
-
167
- # ============ STEP 2: Save cleaned text to debug file ============
168
- debug_file = filename.replace('.pdf', '_cleaned.md')
169
- try:
170
- with open(debug_file, 'w', encoding='utf-8') as f:
171
- f.write(clean_text)
172
- print(f"πŸ“ Saved cleaned markdown to: {debug_file}")
173
- except Exception as e:
174
- print(f"⚠️ Warning: Could not save debug file: {e}")
175
 
176
- # ============ STEP 3: Create PDF with optimal settings ============
177
- print("πŸ”§ Configuring PDF generator...")
178
-
179
- # Initialize MarkdownPdf with minimal TOC
180
- pdf = MarkdownPdf(toc_level=0) # Disable table of contents
181
-
182
- # Set metadata
183
- pdf.meta = {
184
- "title": "Grading Report",
185
- "author": "AI Grading System",
186
- "subject": "Student Assessment"
187
- }
188
-
189
- # Add the content as a section
190
- print("πŸ“‘ Adding content to PDF...")
191
- pdf.add_section(Section(clean_text, toc=False))
192
-
193
- # ============ STEP 4: Save the PDF ============
194
- print(f"πŸ’Ύ Saving PDF to {filename}...")
195
- pdf.save(filename)
196
-
197
- # ============ STEP 5: Verify the PDF was created ============
198
- if os.path.exists(filename):
199
- file_size = os.path.getsize(filename)
200
- print(f"βœ… PDF created successfully!")
201
- print(f"πŸ“Š File size: {file_size / 1024:.2f} KB")
202
-
203
- # Check if file is suspiciously small (might indicate truncation)
204
- if file_size < 10000: # Less than 10KB
205
- print(f"⚠️ Warning: PDF file is very small ({file_size} bytes)")
206
- print(" This might indicate content was truncated.")
207
- print(" Check the PDF file manually.")
208
-
209
- return filename
210
  else:
211
- raise FileNotFoundError(f"PDF file was not created: {filename}")
212
-
213
- except Exception as e:
214
- print(f"❌ PDF generation failed: {e}")
215
- print(f" Error type: {type(e).__name__}")
216
-
217
- import traceback
218
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
- # ============ FALLBACK: Save as Markdown ============
221
- print("πŸ”„ Attempting fallback: Saving as Markdown file...")
222
- try:
223
- md_filename = filename.replace('.pdf', '.md')
224
- with open(md_filename, 'w', encoding='utf-8') as f:
225
- f.write(clean_text if 'clean_text' in locals() else text)
226
- print(f"βœ… Saved as Markdown file: {md_filename}")
227
- print(" You can manually convert this to PDF using an online tool.")
228
- return md_filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
- except Exception as fallback_error:
231
- print(f"❌ Fallback also failed: {fallback_error}")
 
 
 
232
 
233
- # ============ LAST RESORT: Save as plain text ============
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  try:
235
- txt_filename = filename.replace('.pdf', '.txt')
236
- with open(txt_filename, 'w', encoding='utf-8') as f:
237
- f.write(text) # Use original text
238
- print(f"βœ… Saved as text file: {txt_filename}")
239
- return txt_filename
240
-
241
- except Exception as final_error:
242
- print(f"❌ All save attempts failed: {final_error}")
243
- raise Exception("Could not save output in any format") from e
244
-
245
- def save_as_pdf_with_split(text, filename="output.pdf", max_questions=20):
246
- """
247
- Save as PDF, splitting into multiple files if content is too large.
248
- """
249
- try:
250
- # First, try to save normally
251
- return save_as_pdf(text, filename)
252
-
253
- except Exception as e:
254
- print(f"⚠️ Normal save failed, attempting to split document...")
255
-
256
- # Split by questions
257
- question_blocks = re.split(r'(## Question \d+(?:\.[a-z]+)?)', text)
258
-
259
- if len(question_blocks) <= 3: # Not enough to split
260
- raise e
261
-
262
- # Reconstruct questions with headers
263
- questions = []
264
- for i in range(1, len(question_blocks), 2):
265
- if i+1 < len(question_blocks):
266
- questions.append(question_blocks[i] + question_blocks[i+1])
267
-
268
- print(f"πŸ“Š Found {len(questions)} questions to split")
269
-
270
- # Split into chunks
271
- chunk_size = max_questions
272
- pdf_files = []
273
-
274
- for chunk_idx in range(0, len(questions), chunk_size):
275
- chunk = questions[chunk_idx:chunk_idx + chunk_size]
276
- chunk_text = "\n\n".join(chunk)
277
-
278
- # Add header and footer
279
- chunk_header = f"# Grading Report - Part {chunk_idx//chunk_size + 1}\n\n"
280
- chunk_text = chunk_header + chunk_text
281
 
282
- # Save chunk
283
- base_name = filename.replace('.pdf', '')
284
- chunk_filename = f"{base_name}_part{chunk_idx//chunk_size + 1}.pdf"
 
 
 
 
 
 
 
 
 
285
 
286
- print(f"πŸ’Ύ Saving part {chunk_idx//chunk_size + 1}...")
287
- save_as_pdf(chunk_text, chunk_filename)
288
- pdf_files.append(chunk_filename)
289
-
290
- print(f"βœ… Document split into {len(pdf_files)} parts:")
291
- for pdf_file in pdf_files:
292
- print(f" πŸ“„ {pdf_file}")
293
-
294
- return pdf_files[0] # Return first part
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
  def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
297
  if output_path is None:
@@ -433,41 +465,81 @@ def extract_question_ids_from_qpms(text: str):
433
  print("⚠️ No question IDs extracted; will send NA placeholder.")
434
  return fallback_matches
435
 
436
- def build_as_prompt_with_expected_ids(expected_ids, qpms_text=None):
437
  """
438
- Construct the AS transcription prompt injecting the expected IDs block and graph detection instructions.
 
 
 
439
  """
 
440
  if not expected_ids:
441
  ids_block = "{NA}"
442
  else:
443
  ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
444
- refer_text = ""
445
- if qpms_text:
446
- refer_text = (
447
- "\nYou are also provided with the full transcript of the Question Paper and Markscheme (QP+MS). "
448
- "If you encounter ambiguous handwriting (for example, if a number could be '-1.6' or '1.6'), refer to the QP+MS transcript to infer the student's intended answer. "
449
- "However, if you are confident in your transcription, you may use your own judgment. "
450
- "Always prioritize accuracy and context from the QP+MS transcript when in doubt.\n"
 
 
451
  )
452
- prompt = f"""You are a high-quality handwritten transcription assistant.
453
- INPUT: This PDF contains a student's handwritten answer sheet.{refer_text}
454
- TASK: Transcribe the student's answers exactly (as text). Preserve step order and line breaks. Attempt to assign each answer to a question ID if the student has labelled it (e.g., "1", "1a", "2(b)", "3"). If the student hasn't labelled answers, segment contiguous answer blocks and attempt to infer question IDs from context β€” but mark inferred IDs clearly as "INFERRED: <id>"
455
- Enclose all mathematical expressions in Markdown fenced code blocks (``` triple backticks).
456
- If a diagram/graph is omitted, write [Graph omitted].
457
- Unreadable parts: [illegible].
458
- Unanswered: [No response].
459
- Do NOT recreate diagrams.
460
- Ensure consistency and determinism in formatting so subsequent models can grade directly from this aligned format.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  Expected questions (if missing, write NA):
462
  {ids_block}
463
  -----------------------
464
  OUTPUT FORMAT:
 
 
 
 
465
  Question <id>
466
- AS:
467
- <transcribed answer or placeholder>
468
- ==== GRAPH FOUND ANSWERS ====\nGraph found in:\n- Answer <number> β†’ Page <number>\n(one per line)\n==== END GRAPH FOUND ===="""
 
 
 
 
 
469
  return prompt
470
 
 
 
471
  def extract_graph_questions_from_ms(text: str):
472
  """Extract graph questions and page numbers from MS transcript."""
473
  clean_text = text.replace("\u00A0", " ").replace("\t", " ")
@@ -641,7 +713,7 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, expected_ids
641
  page_img = page.convert("RGB")
642
  img_cv = np.array(page_img)
643
  img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
644
- h, w, _h, w, _ = img_cv.shape
645
  cell_w_px, cell_h_px = w / cols, h / rows
646
 
647
  page_mappings = [m for m in all_mappings if m.get("page") == page_num]
@@ -701,10 +773,9 @@ def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
701
  return out_paths
702
 
703
  # ---------------- PIPELINE ----------------
704
- def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
705
  """
706
  Final pipeline with graph-aware grading logic using NEW SDK.
707
- Enhanced with improved PDF saving.
708
  """
709
  try:
710
  print("πŸ” Starting pipeline...")
@@ -722,7 +793,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
722
  print("βœ… Upload complete.")
723
 
724
  print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
725
- qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β†’ Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
726
  qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded)
727
  print("πŸ“„ QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
728
  with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
@@ -740,7 +811,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
740
  extracted_ids = ["NA"]
741
 
742
  print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
743
- as_prompt = build_as_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β†’ Page <number>\n(One per line, after all answers)"
744
  as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded)
745
  print("πŸ“ AS transcription received. Saving debug file: debug_as_transcript.txt")
746
  with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
@@ -765,7 +836,8 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
765
  if ms_graph_images or as_graph_images:
766
  graph_note = "\n\n---\nSome questions require graphs. I've attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
767
  grading_input += graph_note
768
- grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
 
769
  grading_images = ms_graph_images + as_graph_images
770
  grading_text = gemini_generate_content(grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
771
  print("🧾 Grading output received. Saving debug file: debug_grading.md")
@@ -773,35 +845,8 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
773
  f.write(grading_text)
774
 
775
  base_name = os.path.splitext(os.path.basename(ans_path))[0]
776
-
777
- # ============ ENHANCED PDF SAVING WITH ERROR HANDLING ============
778
- grading_pdf_path = f"{base_name}_graded.pdf"
779
-
780
- print("πŸ“„ Attempting to save grading report as PDF...")
781
- try:
782
- # Try normal save first
783
- grading_pdf_path = save_as_pdf(grading_text, grading_pdf_path)
784
- print("βœ… Grading PDF saved successfully:", grading_pdf_path)
785
-
786
- except Exception as pdf_error:
787
- print(f"⚠️ Standard PDF save failed: {pdf_error}")
788
- print("πŸ”„ Trying split document method...")
789
-
790
- try:
791
- # Try split method
792
- grading_pdf_path = save_as_pdf_with_split(grading_text, grading_pdf_path)
793
- print("βœ… Grading PDF saved (split method):", grading_pdf_path)
794
-
795
- except Exception as split_error:
796
- print(f"⚠️ Split method also failed: {split_error}")
797
- print("πŸ’Ύ Saving as Markdown fallback...")
798
-
799
- # Fallback to markdown
800
- grading_pdf_path = grading_pdf_path.replace('.pdf', '.md')
801
- with open(grading_pdf_path, 'w', encoding='utf-8') as f:
802
- f.write(grading_text)
803
- print(f"βœ… Saved as Markdown file: {grading_pdf_path}")
804
- print("ℹ️ You can convert this .md file to PDF using online tools or pandoc")
805
 
806
  grading_json = extract_marks_from_grading(grading_text)
807
  with open("debug_grading_json.json", "w", encoding="utf-8") as f:
@@ -812,14 +857,8 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
812
  if imprint:
813
  print("✍ Imprint option enabled. Starting imprinting process...")
814
  imprinted_pdf_path = f"{base_name}_imprinted.pdf"
815
- try:
816
- imprinted_pdf_path = imprint_marks_using_mapping(ans_path, grading_json, imprinted_pdf_path, extracted_ids)
817
- print("βœ… Imprinting finished. Imprinted PDF at:", imprinted_pdf_path)
818
- except Exception as imprint_error:
819
- print(f"❌ Imprinting failed: {imprint_error}")
820
- import traceback
821
- traceback.print_exc()
822
- imprinted_pdf_path = None
823
 
824
  print("🏁 Pipeline finished successfully.")
825
  return qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path
@@ -831,105 +870,53 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
831
  return f"❌ Error: {e}", None, None, None, None
832
 
833
  # ---------------- GRADIO UI ----------------
834
- with gr.Blocks(title="AI Grading System - Enhanced", theme=gr.themes.Soft()) as demo:
835
- gr.Markdown("# πŸ“˜ AI Grading System - Enhanced Version")
836
- gr.Markdown("**βœ… Using official `google-genai` SDK with improved PDF generation**")
837
- gr.Markdown("---")
838
 
839
  with gr.Row():
840
- with gr.Column():
841
- qp_file = gr.File(label="πŸ“„ Upload Question Paper (PDF)", file_types=[".pdf"])
842
- with gr.Column():
843
- ms_file = gr.File(label="πŸ“„ Upload Markscheme (PDF)", file_types=[".pdf"])
844
- with gr.Column():
845
- ans_file = gr.File(label="πŸ“ Upload Student Answer Sheet (PDF)", file_types=[".pdf"])
846
 
847
  with gr.Row():
848
- imprint_toggle = gr.Checkbox(label="✍ Imprint Marks on Student Answer Sheet", value=False)
849
- run_button = gr.Button("πŸš€ Run Grading Pipeline", variant="primary", size="lg")
850
-
851
- gr.Markdown("---")
852
- gr.Markdown("### πŸ“Š Transcription Outputs")
 
 
853
 
 
 
854
  with gr.Row():
855
- qpms_box = gr.Textbox(label="πŸ“‘ QP+MS Transcript", lines=12, max_lines=20)
856
- as_box = gr.Textbox(label="πŸ“ AS Transcript", lines=12, max_lines=20)
857
 
858
- gr.Markdown("---")
859
- gr.Markdown("### 🎯 Grading Results")
860
-
861
- grading_output_box = gr.Textbox(label="🧾 Grading Report (Markdown)", lines=20, max_lines=30)
862
-
863
- with gr.Row():
864
- grading_pdf_file = gr.File(label="πŸ“₯ Download Grading Report (PDF/MD)")
865
- imprint_pdf_file = gr.File(label="πŸ“₯ Download Imprinted Answer Sheet (Optional)")
866
-
867
- gr.Markdown("---")
868
- gr.Markdown("""
869
- ### πŸ“ Instructions:
870
- 1. Upload all three PDF files (Question Paper, Markscheme, Answer Sheet)
871
- 2. Optionally enable mark imprinting on the answer sheet
872
- 3. Click "Run Grading Pipeline" and wait for processing
873
- 4. Review transcripts and download the grading report
874
-
875
- ### ⚠️ Notes:
876
- - Large documents may take several minutes to process
877
- - If PDF generation fails, a Markdown (.md) file will be provided instead
878
- - Check the console/logs for detailed progress information
879
- - Debug files are saved automatically for troubleshooting
880
- """)
881
-
882
- def run_pipeline(qp_file_obj, ms_file_obj, ans_file_obj, imprint_flag):
883
- """
884
- Wrapper function for Gradio interface
885
- """
886
  if not qp_file_obj or not ms_file_obj or not ans_file_obj:
887
- error_msg = "❌ Please upload all three files (QP, MS, and Answer Sheet)"
888
- return error_msg, "", "", None, None
889
 
890
  qp_path = qp_file_obj.name
891
  ms_path = ms_file_obj.name
892
  ans_path = ans_file_obj.name
893
 
894
- print("\n" + "="*80)
895
- print("🎬 STARTING NEW GRADING SESSION")
896
- print("="*80 + "\n")
897
-
898
  qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path = align_and_grade_pipeline(
899
- qp_path, ms_path, ans_path, imprint=imprint_flag
900
  )
901
 
902
- print("\n" + "="*80)
903
- print("🎬 GRADING SESSION COMPLETE")
904
- print("="*80 + "\n")
905
-
906
  return qpms_text or "", as_text or "", grading_text or "", grading_pdf_path, imprinted_pdf_path
907
 
908
  run_button.click(
909
  fn=run_pipeline,
910
- inputs=[qp_file, ms_file, ans_file, imprint_toggle],
911
  outputs=[qpms_box, as_box, grading_output_box, grading_pdf_file, imprint_pdf_file]
912
  )
913
 
914
  if __name__ == "__main__":
915
- print("="*80)
916
- print("πŸš€ AI GRADING SYSTEM - STARTING")
917
- print("="*80)
918
- print("πŸ“Œ Make sure GEMINI_API_KEY environment variable is set")
919
- print("πŸ“Œ Required dependencies: google-genai, markdown_pdf, gradio, pdf2image, etc.")
920
- print("="*80 + "\n")
921
-
922
- # Check if API key is set
923
- if not os.getenv("GEMINI_API_KEY"):
924
- print("⚠️ WARNING: GEMINI_API_KEY not found in environment variables!")
925
- print(" Set it with: export GEMINI_API_KEY='your-api-key-here'")
926
- else:
927
- print("βœ… GEMINI_API_KEY found")
928
-
929
- print("\n🌐 Launching Gradio interface...\n")
930
- demo.launch(
931
- server_name="0.0.0.0",
932
- server_port=7860,
933
- share=False,
934
- show_error=True
935
- )
 
6
  import img2pdf
7
  import gradio as gr
8
  from google import genai # NEW SDK
 
9
  from pdf2image import convert_from_path
10
  from PIL import Image, ImageDraw, ImageFont
11
  import cv2
12
  import numpy as np
13
  from PyPDF2 import PdfReader, PdfWriter
14
+ from prompts import QP_MS_TRANSCRIPTION_PROMPT, get_grading_prompt
15
 
16
  # ---------------- CONFIG ----------------
17
  # Create client with new SDK
 
19
  GRID_ROWS, GRID_COLS = 20, 14
20
 
21
  # ---------------- PROMPTS ----------------
22
+ # Prompts are now imported from prompts.py
23
+
24
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # ---------------- HELPERS ----------------
27
+ def parse_md_table(md):
28
+ """Parse a Markdown table into a list of rows."""
29
+ lines = [l for l in md.split("\n") if l.strip()]
30
+ if len(lines) < 3:
31
+ return []
32
+ lines = lines[2:] # skip header + separator
33
+ rows = []
34
+ for line in lines:
35
+ parts = [c.strip() for c in line.strip("|").split("|")]
36
+ # Filter out empty strings from leading/trailing pipes
37
+ clean_parts = [p for p in parts if p]
38
+ if clean_parts:
39
+ rows.append(clean_parts)
40
+ return rows
41
+
42
+ def convert_html_color_spans(md_text):
43
+ """Convert HTML color spans to LaTeX textcolor commands."""
44
+ pattern = r'<span\s+style="color:\s*([^"]+)">\s*(.*?)\s*</span>'
45
+ def repl(m):
46
+ color = m.group(1).strip()
47
+ text = m.group(2)
48
+ return fr'\textcolor{{{color}}}{{{text}}}'
49
+ return re.sub(pattern, repl, md_text, flags=re.IGNORECASE)
50
+
51
+ def cleanup_markdown_for_latex(md_text):
52
+ """Clean up markdown text for better LaTeX conversion."""
53
+ # Ensure spacing between bold headers and tables
54
+ md_text = re.sub(r'(\*\*Markscheme vs Student Answer\*\*)\s*(\|)', r'\1\n\n\2', md_text)
55
+
56
+ # Convert common unicode math symbols to LaTeX (safety net)
57
+ replacements = {
58
+ '∫': r'\int ',
59
+ 'Β²': '^2',
60
+ 'Β³': '^3',
61
+ 'Β½': r'\frac{1}{2}',
62
+ 'ΒΌ': r'\frac{1}{4}',
63
+ '∞': r'\infty',
64
+ '≀': r'\leq',
65
+ 'β‰₯': r'\geq',
66
+ 'β‰ ': r'\neq',
67
+ 'Β±': r'\pm',
68
+ 'Γ—': r'\times',
69
+ 'Γ·': r'\div',
70
+ '√': r'\sqrt',
71
+ 'βˆ‘': r'\sum',
72
+ '∏': r'\prod',
73
+ 'βˆ‚': r'\partial',
74
+ 'Ο€': r'\pi',
75
+ 'ΞΈ': r'\theta',
76
+ 'Ξ±': r'\alpha',
77
+ 'Ξ²': r'\beta',
78
+ 'Ξ³': r'\gamma',
79
+ 'Ξ΄': r'\delta',
80
+ 'Ξ΅': r'\epsilon',
81
+ 'Ξ»': r'\lambda',
82
+ 'ΞΌ': r'\mu',
83
+ 'Οƒ': r'\sigma',
84
+ 'Ξ”': r'\Delta',
85
+ 'Ξ£': r'\Sigma',
86
+ 'Ξ©': r'\Omega'
87
+ }
88
+
89
+ for char, latex in replacements.items():
90
+ md_text = md_text.replace(char, f'${latex}$')
91
+
92
+ return md_text
93
+
94
+ def escape_latex_special_chars(text):
95
+ """Escape special LaTeX characters in text."""
96
+ replacements = {
97
+ '%': r'\%',
98
+ '&': r'\&',
99
+ '#': r'\#',
100
+ '_': r'\_',
101
+ '{': r'\{',
102
+ '}': r'\}',
103
+ '~': r'\textasciitilde{}',
104
+ '^': r'\textasciicircum{}'
105
+ }
106
+
107
+ # Don't escape if already in math mode or LaTeX command
108
+ if '$' in text or '\\' in text:
109
+ return text
110
+
111
+ for char, escaped in replacements.items():
112
+ text = text.replace(char, escaped)
113
+
114
+ return text
115
+
116
+ def save_as_pdf(text, filename="output.pdf"):
117
  """
118
+ Convert Markdown text to PDF using Pandoc with pdflatex.
119
+ Extracts the Examiner's Summary Report and places it at the top with enhanced formatting.
120
+ Converts HTML color spans to LaTeX textcolor commands.
121
+
122
+ Args:
123
+ text (str): Markdown content to convert
124
+ filename (str): Output PDF filename
125
+
126
+ Returns:
127
+ str: Path to the generated PDF file
128
+
129
+ Raises:
130
+ Exception: If Pandoc or pdflatex is not available, or conversion fails
131
  """
132
+ base_name = os.path.splitext(filename)[0]
133
+ temp_md_file = f"{base_name}_input.md"
134
+ temp_tex_file = f"{base_name}_temp.tex"
135
+
136
  try:
137
+ print(f"πŸ“ Processing markdown for PDF generation...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ # Step 1: Extract Summary Report Table
140
+ summary_pattern = re.compile(
141
+ r"### Examiner's Summary Report\s*\n\n(\|.*?\|)\s*\n\n\*\*Total:\s*(.*?)\*\*",
142
+ re.DOTALL
143
+ )
144
+ summary_match = summary_pattern.search(text)
145
+
146
+ if summary_match:
147
+ summary_table_md = summary_match.group(1)
148
+ summary_total = summary_match.group(2)
149
+ # Remove summary section from markdown
150
+ text = summary_pattern.sub("", text)
151
+ print("βœ… Extracted Examiner's Summary Report")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  else:
153
+ summary_table_md = ""
154
+ summary_total = ""
155
+ print("⚠️ No Examiner's Summary Report found")
156
+
157
+ # Step 2: Clean up markdown and convert HTML color spans to LaTeX
158
+ text = cleanup_markdown_for_latex(text)
159
+ text = convert_html_color_spans(text)
160
+ print("βœ… Cleaned markdown and converted HTML color spans to LaTeX")
161
+
162
+ # Save cleaned markdown
163
+ with open(temp_md_file, 'w', encoding='utf-8') as f:
164
+ f.write(text)
165
+
166
+ # Step 3: Convert MD to LaTeX via Pandoc
167
+ print(f"πŸ“ Converting markdown to LaTeX using Pandoc...")
168
+ pandoc_cmd = [
169
+ "pandoc",
170
+ "--from=markdown",
171
+ "--to=latex",
172
+ "--standalone",
173
+ temp_md_file,
174
+ "-o", temp_tex_file
175
+ ]
176
+
177
+ result = subprocess.run(pandoc_cmd, capture_output=True, check=False)
178
+ if result.returncode != 0 or not os.path.exists(temp_tex_file):
179
+ try:
180
+ stderr = result.stderr.decode('utf-8', errors='replace')
181
+ except:
182
+ stderr = str(result.stderr)
183
+ raise Exception(f"Pandoc conversion failed: {stderr}")
184
+ print("βœ… Pandoc conversion complete")
185
+
186
+ # Step 4: Modify the generated LaTeX
187
+ with open(temp_tex_file, "r", encoding="utf-8") as f:
188
+ tex = f.read()
189
+
190
+ # Change document class to larger font
191
+ tex = tex.replace(
192
+ r"\documentclass{article}",
193
+ r"\documentclass[12pt]{extarticle}"
194
+ )
195
 
196
+ # Inject enhanced packages with better table formatting
197
+ insert_packages = r"""\usepackage[a4paper, margin=1in]{geometry}
198
+ \usepackage{xcolor}
199
+ \usepackage{colortbl}
200
+ \usepackage{booktabs}
201
+ \usepackage{array}
202
+ \usepackage{longtable}
203
+ \renewcommand{\arraystretch}{1.4}
204
+ \newcolumntype{L}[1]{>{\raggedright\arraybackslash}p{#1}}"""
205
+
206
+ tex = tex.replace(r"\begin{document}", insert_packages + "\n\\begin{document}")
207
+
208
+ # Step 5: Build enhanced LaTeX table for summary with zebra striping (if exists)
209
+ if summary_table_md:
210
+ summary_rows = parse_md_table(summary_table_md)
211
+ summary_latex = r"""\section*{Examiner's Summary Report}
212
+ \begin{center}
213
+ \rowcolors{2}{gray!10}{white}
214
+ \begin{tabular}{|c|c|c|L{8cm}|}
215
+ \hline
216
+ \rowcolor{gray!30}
217
+ \textbf{Question} & \textbf{Marks} & \textbf{Remark} & \textbf{Feedback} \\ \hline
218
+ """
219
+ for row in summary_rows:
220
+ if len(row) >= 4:
221
+ # Escape special LaTeX characters in feedback
222
+ feedback = row[3]
223
+ # Only escape if not already LaTeX code
224
+ if not ('$' in feedback or '\\textcolor' in feedback):
225
+ feedback = feedback.replace('%', r'\%').replace('&', r'\&').replace('#', r'\#')
226
+
227
+ summary_latex += f"{row[0]} & {row[1]} & {row[2]} & {feedback} \\\\ \\hline\n"
228
 
229
+ summary_latex += r"\end{tabular}"
230
+ summary_latex += "\n\\end{center}\n\n"
231
+ summary_latex += f"\\vspace{{0.5cm}}\\noindent\\textbf{{\\Large Overall Score: {summary_total}}}\n\n"
232
+ summary_latex += "\\hrulefill\n\\vspace{1cm}\n\n"
233
+ summary_latex += "\\newpage\n\n"
234
 
235
+ # Insert summary right after \begin{document}
236
+ tex = tex.replace(
237
+ r"\begin{document}",
238
+ r"\begin{document}" + "\n\n" + summary_latex
239
+ )
240
+ print("βœ… Injected enhanced summary table with zebra striping at top of document")
241
+
242
+ # Save modified LaTeX
243
+ with open(temp_tex_file, "w", encoding="utf-8") as f:
244
+ f.write(tex)
245
+
246
+ # Step 6: Compile PDF with pdflatex
247
+ print(f"πŸ“ Compiling PDF with pdflatex...")
248
+ pdflatex_cmd = [
249
+ "pdflatex",
250
+ "-interaction=nonstopmode",
251
+ f"-output-directory={os.path.dirname(os.path.abspath(temp_tex_file)) or '.'}",
252
+ temp_tex_file
253
+ ]
254
+
255
+ # Run twice to resolve references
256
+ # Don't use text=True to avoid encoding issues with pdflatex output
257
+ result1 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
258
+ result2 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
259
+
260
+ # Check if PDF was actually created (better than checking return code)
261
+ temp_pdf = temp_tex_file.replace(".tex", ".pdf")
262
+ if not os.path.exists(temp_pdf):
263
+ # Try to decode error output for debugging
264
  try:
265
+ stderr = result2.stderr.decode('utf-8', errors='replace')
266
+ except:
267
+ stderr = str(result2.stderr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
+ # Also check log file for more details
270
+ log_file = temp_tex_file.replace(".tex", ".log")
271
+ if os.path.exists(log_file):
272
+ try:
273
+ with open(log_file, 'r', encoding='utf-8', errors='replace') as f:
274
+ log_content = f.read()
275
+ # Extract error lines
276
+ error_lines = [line for line in log_content.split('\n') if '!' in line]
277
+ if error_lines:
278
+ stderr += "\n\nLaTeX Errors:\n" + "\n".join(error_lines[:10])
279
+ except:
280
+ pass
281
 
282
+ raise Exception(f"pdflatex failed to create PDF. Check LaTeX syntax. Error: {stderr[:1000]}")
283
+
284
+ # Move output PDF to final filename
285
+ if os.path.exists(temp_pdf):
286
+ if os.path.exists(filename):
287
+ os.remove(filename)
288
+ os.rename(temp_pdf, filename)
289
+
290
+ print(f"βœ… PDF generated successfully: {filename}")
291
+
292
+ # Clean up temporary files
293
+ for ext in [".md", ".tex", ".aux", ".log", ".out"]:
294
+ temp_file = base_name + ext
295
+ if os.path.exists(temp_file):
296
+ os.remove(temp_file)
297
+ # Also clean input/temp variants
298
+ for prefix in ["_input", "_temp"]:
299
+ temp_file = base_name + prefix + ext
300
+ if os.path.exists(temp_file):
301
+ os.remove(temp_file)
302
+
303
+ return filename
304
+
305
+ except subprocess.CalledProcessError as e:
306
+ print(f"❌ Conversion failed: {e}")
307
+ print(f" STDOUT: {e.stdout}")
308
+ print(f" STDERR: {e.stderr}")
309
+
310
+ raise Exception(f"PDF conversion failed: {e.stderr}")
311
+
312
+ except FileNotFoundError as e:
313
+ print(f"❌ Required tool not found: {e}")
314
+
315
+ raise Exception(
316
+ "Pandoc or pdflatex not found. Please install:\n"
317
+ " - pandoc\n"
318
+ " - texlive (or MiKTeX on Windows)\n"
319
+ " - texlive-latex-extra (for extarticle class)"
320
+ )
321
+
322
+ except Exception as e:
323
+ print(f"❌ Unexpected error during PDF conversion: {e}")
324
+ import traceback
325
+ traceback.print_exc()
326
+ raise
327
 
328
  def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
329
  if output_path is None:
 
465
  print("⚠️ No question IDs extracted; will send NA placeholder.")
466
  return fallback_matches
467
 
468
+ def build_as_cot_prompt_with_expected_ids(expected_ids, qpms_text=None):
469
  """
470
+ Construct the AS transcription prompt injecting the expected IDs block and graph detection instructions,
471
+ modifying it to include a Chain-of-Thought (CoT) section using a <think> tag, and
472
+ requiring mathematical expressions to be enclosed in LaTeX dollar delimiters ($...$).
473
+ Includes explicit rules for interpreting NA-like answers and no-response situations.
474
  """
475
+
476
  if not expected_ids:
477
  ids_block = "{NA}"
478
  else:
479
  ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
480
+
481
+ qpms_section = ""
482
+ if qpms_text is not None:
483
+ qpms_section = (
484
+ "\nYou are also provided with the full transcript of the Question Paper and Markscheme (QP+MS) below."
485
+ "\nUse it primarily to resolve ambiguous handwriting and to confirm expected answers when needed."
486
+ "\n--- BEGIN QP+MS TRANSCRIPT ---\n"
487
+ f"{qpms_text.strip()}\n"
488
+ "--- END QP+MS TRANSCRIPT ---\n"
489
  )
490
+
491
+ prompt = f"""You are a high-quality handwritten transcription assistant, performing transcription with a Chain-of-Thought process.
492
+ INPUT: This PDF contains a student's handwritten answer sheet.
493
+ {qpms_section}
494
+ TASK:
495
+ 1. **THINKING:** Before transcribing each answer, document your thought process inside a **<think>** tag.
496
+ - Identify the question ID. If inferred, note why.
497
+ - Detail any ambiguities (unclear numbers, symbols, or structures).
498
+ - Explain how ambiguities were resolved, including whether the QP+MS transcript was consulted.
499
+ - If QP+MS was consulted but you chose not to change the transcription, state this.
500
+ - If the initial question label was incorrect (e.g., 2.a vs 2.b), correct it and briefly explain the reasoning in <think>.
501
+ *Example Thinking:*
502
+ <think>
503
+ - Found Question 3(a).
504
+ - The term could be '$2x$' or '21x'.
505
+ - Markscheme uses '$21x$', but handwriting matches '$2x$'.
506
+ - Decision: transcribe '$2x$'.
507
+ </think>
508
+
509
+ 2. **TRANSCRIPTION:** Transcribe the student's answers directly and faithfully.
510
+ - Assign each answer to a labelled question ID when present.
511
+ - For unlabeled answers, segment logically and mark inferred IDs as "**INFERRED: <id>**".
512
+ - **Mathematical expressions and standalone variables must appear inside LaTeX dollar delimiters ($...$).**
513
+ - If a diagram/graph is omitted, write **[Graph omitted]**.
514
+ - If handwriting is unreadable: **[illegible]**.
515
+
516
+ **ANSWER-INTERPRETATION RULES:**
517
+ - If the student writes β€œNA”, β€œN/A”, β€œNot Applicable”, or clear equivalents β†’ record exactly as **NA**.
518
+ - If the student leaves the space blank, crosses it out, makes no meaningful attempt, or provides no answer β†’ record **[No response]**.
519
+
520
+ Ensure deterministic formatting so subsequent models can grade directly from this aligned format.
521
+
522
  Expected questions (if missing, write NA):
523
  {ids_block}
524
  -----------------------
525
  OUTPUT FORMAT:
526
+ <think>...</think>
527
+ Question <id>
528
+ AS:<transcribed answer or placeholder>
529
+ <think>...</think>
530
  Question <id>
531
+ AS:<transcribed answer or placeholder>
532
+ ...
533
+ ==== GRAPH FOUND ANSWERS ====
534
+ Graph found in:
535
+ - Answer <number> β†’ Page <number>
536
+ (one per line)
537
+ ==== END GRAPH FOUND ===="""
538
+
539
  return prompt
540
 
541
+
542
+
543
  def extract_graph_questions_from_ms(text: str):
544
  """Extract graph questions and page numbers from MS transcript."""
545
  clean_text = text.replace("\u00A0", " ").replace("\t", " ")
 
713
  page_img = page.convert("RGB")
714
  img_cv = np.array(page_img)
715
  img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
716
+ h, w, _ = img_cv.shape
717
  cell_w_px, cell_h_px = w / cols, h / rows
718
 
719
  page_mappings = [m for m in all_mappings if m.get("page") == page_num]
 
773
  return out_paths
774
 
775
  # ---------------- PIPELINE ----------------
776
+ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprint=False):
777
  """
778
  Final pipeline with graph-aware grading logic using NEW SDK.
 
779
  """
780
  try:
781
  print("πŸ” Starting pipeline...")
 
793
  print("βœ… Upload complete.")
794
 
795
  print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
796
+ qpms_prompt = QP_MS_TRANSCRIPTION_PROMPT["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β†’ Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
797
  qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded)
798
  print("πŸ“„ QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
799
  with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
 
811
  extracted_ids = ["NA"]
812
 
813
  print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
814
+ as_prompt = build_as_cot_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β†’ Page <number>\n(One per line, after all answers)"
815
  as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded)
816
  print("πŸ“ AS transcription received. Saving debug file: debug_as_transcript.txt")
817
  with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
 
836
  if ms_graph_images or as_graph_images:
837
  graph_note = "\n\n---\nSome questions require graphs. I've attached the relevant graph pages from QP+MS and from the Answer Sheet. Use them as visual context when grading.\n---\n"
838
  grading_input += graph_note
839
+ grading_prompt_obj = get_grading_prompt(subject.lower())
840
+ grading_prompt_system = grading_prompt_obj["content"]
841
  grading_images = ms_graph_images + as_graph_images
842
  grading_text = gemini_generate_content(grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input, image_obj=grading_images if grading_images else None)
843
  print("🧾 Grading output received. Saving debug file: debug_grading.md")
 
845
  f.write(grading_text)
846
 
847
  base_name = os.path.splitext(os.path.basename(ans_path))[0]
848
+ grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
849
+ print("πŸ“„ Grading PDF saved:", grading_pdf_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
850
 
851
  grading_json = extract_marks_from_grading(grading_text)
852
  with open("debug_grading_json.json", "w", encoding="utf-8") as f:
 
857
  if imprint:
858
  print("✍ Imprint option enabled. Starting imprinting process...")
859
  imprinted_pdf_path = f"{base_name}_imprinted.pdf"
860
+ imprinted_pdf_path = imprint_marks_using_mapping(ans_path, grading_json, imprinted_pdf_path, extracted_ids)
861
+ print("βœ… Imprinting finished. Imprinted PDF at:", imprinted_pdf_path)
 
 
 
 
 
 
862
 
863
  print("🏁 Pipeline finished successfully.")
864
  return qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path
 
870
  return f"❌ Error: {e}", None, None, None, None
871
 
872
  # ---------------- GRADIO UI ----------------
873
+ with gr.Blocks(title="AI Grading (Pandoc + pdflatex)") as demo:
874
+ gr.Markdown("## πŸ“˜ AI Grading β€” Using Pandoc + pdflatex for PDF Generation")
875
+ gr.Markdown("**βœ… Now using Pandoc with pdflatex for professional-quality PDF outputs!**")
 
876
 
877
  with gr.Row():
878
+ qp_file = gr.File(label="πŸ“„ Upload Question Paper (PDF)")
879
+ ms_file = gr.File(label="πŸ“„ Upload Markscheme (PDF)")
880
+ ans_file = gr.File(label="πŸ“ Upload Student Answer Sheet (PDF)")
 
 
 
881
 
882
  with gr.Row():
883
+ subject_dropdown = gr.Dropdown(
884
+ choices=["Maths", "Science"],
885
+ value="Maths",
886
+ label="πŸ“š Subject",
887
+ info="Select the subject to apply appropriate grading guidelines"
888
+ )
889
+ imprint_toggle = gr.Checkbox(label="✍ Imprint Marks on Student Answer Sheet", value=False)
890
 
891
+ run_button = gr.Button("πŸš€ Run Pipeline")
892
+
893
  with gr.Row():
894
+ qpms_box = gr.Textbox(label="πŸ“‘ QP+MS Transcript", lines=12)
895
+ as_box = gr.Textbox(label="πŸ“ AS Transcript", lines=12)
896
 
897
+ grading_output_box = gr.Textbox(label="🧾 Grading (Markdown)", lines=20)
898
+ grading_pdf_file = gr.File(label="πŸ“₯ Download Grading PDF")
899
+ imprint_pdf_file = gr.File(label="πŸ“₯ Download Imprinted PDF (Optional)")
900
+
901
+ def run_pipeline(qp_file_obj, ms_file_obj, ans_file_obj, subject_choice, imprint_flag):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
902
  if not qp_file_obj or not ms_file_obj or not ans_file_obj:
903
+ return "❌ Please upload all three files", "", "", None, None
 
904
 
905
  qp_path = qp_file_obj.name
906
  ms_path = ms_file_obj.name
907
  ans_path = ans_file_obj.name
908
 
 
 
 
 
909
  qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path = align_and_grade_pipeline(
910
+ qp_path, ms_path, ans_path, subject=subject_choice, imprint=imprint_flag
911
  )
912
 
 
 
 
 
913
  return qpms_text or "", as_text or "", grading_text or "", grading_pdf_path, imprinted_pdf_path
914
 
915
  run_button.click(
916
  fn=run_pipeline,
917
+ inputs=[qp_file, ms_file, ans_file, subject_dropdown, imprint_toggle],
918
  outputs=[qpms_box, as_box, grading_output_box, grading_pdf_file, imprint_pdf_file]
919
  )
920
 
921
  if __name__ == "__main__":
922
+ demo.launch()