atz21 commited on
Commit
983d970
·
verified ·
1 Parent(s): cf4d77e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -9
app.py CHANGED
@@ -1049,17 +1049,62 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, expected_ids
1049
  def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
1050
  """
1051
  Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
 
1052
  """
 
 
 
 
1053
  unique_pages = sorted(set(page_numbers))
1054
- images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055
  out_paths = []
1056
  for idx, page_num in enumerate(unique_pages):
1057
  img_idx = page_num - min(unique_pages)
1058
- img = images[img_idx]
1059
- out_path = f"{prefix}_page_{page_num}.png"
1060
- img.save(out_path, "PNG")
1061
- print(f"📤 Extracted graph page {page_num} from {pdf_path} as {out_path}")
1062
- out_paths.append(out_path)
 
 
 
 
 
 
 
 
 
 
 
1063
  return out_paths
1064
 
1065
  # ---------------- PIPELINE ----------------
@@ -1189,9 +1234,9 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
1189
  return f"❌ Error: {e}", None, None, None, None, {}
1190
 
1191
  # ---------------- GRADIO UI ----------------
1192
- with gr.Blocks(title="AI Grading") as demo:
1193
- gr.Markdown("## 📘 AI Grading ")
1194
-
1195
 
1196
  if supabase_client:
1197
  gr.Markdown("**☁️ Supabase Storage: Enabled** - All files will be uploaded to cloud storage")
 
1049
  def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
1050
  """
1051
  Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
1052
+ Handles cases where requested pages don't exist in the PDF.
1053
  """
1054
+ if not page_numbers:
1055
+ print(f"⚠️ No page numbers provided for extraction")
1056
+ return []
1057
+
1058
  unique_pages = sorted(set(page_numbers))
1059
+
1060
+ # First, get the total page count to validate requested pages
1061
+ try:
1062
+ from PyPDF2 import PdfReader
1063
+ reader = PdfReader(pdf_path)
1064
+ total_pages = len(reader.pages)
1065
+ print(f"📄 PDF has {total_pages} total pages")
1066
+
1067
+ # Filter out invalid page numbers
1068
+ valid_pages = [p for p in unique_pages if 1 <= p <= total_pages]
1069
+ invalid_pages = [p for p in unique_pages if p not in valid_pages]
1070
+
1071
+ if invalid_pages:
1072
+ print(f"⚠️ Skipping invalid page numbers (out of range): {invalid_pages}")
1073
+
1074
+ if not valid_pages:
1075
+ print(f"❌ No valid pages to extract from {pdf_path}")
1076
+ return []
1077
+
1078
+ unique_pages = valid_pages
1079
+ except Exception as e:
1080
+ print(f"⚠️ Could not validate page numbers: {e}. Proceeding with extraction...")
1081
+
1082
+ # Extract the pages
1083
+ try:
1084
+ images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
1085
+ except Exception as e:
1086
+ print(f"❌ Failed to convert PDF pages to images: {e}")
1087
+ return []
1088
+
1089
  out_paths = []
1090
  for idx, page_num in enumerate(unique_pages):
1091
  img_idx = page_num - min(unique_pages)
1092
+
1093
+ # Bounds check to prevent index errors
1094
+ if img_idx >= len(images):
1095
+ print(f"⚠️ Page {page_num} not found in extracted images (index {img_idx} >= {len(images)}). Skipping...")
1096
+ continue
1097
+
1098
+ try:
1099
+ img = images[img_idx]
1100
+ out_path = f"{prefix}_page_{page_num}.png"
1101
+ img.save(out_path, "PNG")
1102
+ print(f"📤 Extracted graph page {page_num} from {pdf_path} as {out_path}")
1103
+ out_paths.append(out_path)
1104
+ except Exception as e:
1105
+ print(f"❌ Failed to save page {page_num}: {e}")
1106
+ continue
1107
+
1108
  return out_paths
1109
 
1110
  # ---------------- PIPELINE ----------------
 
1234
  return f"❌ Error: {e}", None, None, None, None, {}
1235
 
1236
  # ---------------- GRADIO UI ----------------
1237
+ with gr.Blocks(title="AI Grading (Pandoc + pdflatex)") as demo:
1238
+ gr.Markdown("## 📘 AI Grading — Using Pandoc + pdflatex for PDF Generation")
1239
+ gr.Markdown("**✅ Now using Pandoc with pdflatex for professional-quality PDF outputs!**")
1240
 
1241
  if supabase_client:
1242
  gr.Markdown("**☁️ Supabase Storage: Enabled** - All files will be uploaded to cloud storage")