Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1049,17 +1049,62 @@ def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, expected_ids
|
|
| 1049 |
def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
|
| 1050 |
"""
|
| 1051 |
Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
|
|
|
|
| 1052 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1053 |
unique_pages = sorted(set(page_numbers))
|
| 1054 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1055 |
out_paths = []
|
| 1056 |
for idx, page_num in enumerate(unique_pages):
|
| 1057 |
img_idx = page_num - min(unique_pages)
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1063 |
return out_paths
|
| 1064 |
|
| 1065 |
# ---------------- PIPELINE ----------------
|
|
@@ -1189,9 +1234,9 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
|
|
| 1189 |
return f"❌ Error: {e}", None, None, None, None, {}
|
| 1190 |
|
| 1191 |
# ---------------- GRADIO UI ----------------
|
| 1192 |
-
with gr.Blocks(title="AI Grading") as demo:
|
| 1193 |
-
gr.Markdown("## 📘 AI Grading ")
|
| 1194 |
-
|
| 1195 |
|
| 1196 |
if supabase_client:
|
| 1197 |
gr.Markdown("**☁️ Supabase Storage: Enabled** - All files will be uploaded to cloud storage")
|
|
|
|
| 1049 |
def extract_pdf_pages_as_images(pdf_path, page_numbers, prefix):
|
| 1050 |
"""
|
| 1051 |
Extracts unique pages (1-based) from a PDF as images, saves as PNG, returns list of file paths.
|
| 1052 |
+
Handles cases where requested pages don't exist in the PDF.
|
| 1053 |
"""
|
| 1054 |
+
if not page_numbers:
|
| 1055 |
+
print(f"⚠️ No page numbers provided for extraction")
|
| 1056 |
+
return []
|
| 1057 |
+
|
| 1058 |
unique_pages = sorted(set(page_numbers))
|
| 1059 |
+
|
| 1060 |
+
# First, get the total page count to validate requested pages
|
| 1061 |
+
try:
|
| 1062 |
+
from PyPDF2 import PdfReader
|
| 1063 |
+
reader = PdfReader(pdf_path)
|
| 1064 |
+
total_pages = len(reader.pages)
|
| 1065 |
+
print(f"📄 PDF has {total_pages} total pages")
|
| 1066 |
+
|
| 1067 |
+
# Filter out invalid page numbers
|
| 1068 |
+
valid_pages = [p for p in unique_pages if 1 <= p <= total_pages]
|
| 1069 |
+
invalid_pages = [p for p in unique_pages if p not in valid_pages]
|
| 1070 |
+
|
| 1071 |
+
if invalid_pages:
|
| 1072 |
+
print(f"⚠️ Skipping invalid page numbers (out of range): {invalid_pages}")
|
| 1073 |
+
|
| 1074 |
+
if not valid_pages:
|
| 1075 |
+
print(f"❌ No valid pages to extract from {pdf_path}")
|
| 1076 |
+
return []
|
| 1077 |
+
|
| 1078 |
+
unique_pages = valid_pages
|
| 1079 |
+
except Exception as e:
|
| 1080 |
+
print(f"⚠️ Could not validate page numbers: {e}. Proceeding with extraction...")
|
| 1081 |
+
|
| 1082 |
+
# Extract the pages
|
| 1083 |
+
try:
|
| 1084 |
+
images = convert_from_path(pdf_path, dpi=200, first_page=min(unique_pages), last_page=max(unique_pages))
|
| 1085 |
+
except Exception as e:
|
| 1086 |
+
print(f"❌ Failed to convert PDF pages to images: {e}")
|
| 1087 |
+
return []
|
| 1088 |
+
|
| 1089 |
out_paths = []
|
| 1090 |
for idx, page_num in enumerate(unique_pages):
|
| 1091 |
img_idx = page_num - min(unique_pages)
|
| 1092 |
+
|
| 1093 |
+
# Bounds check to prevent index errors
|
| 1094 |
+
if img_idx >= len(images):
|
| 1095 |
+
print(f"⚠️ Page {page_num} not found in extracted images (index {img_idx} >= {len(images)}). Skipping...")
|
| 1096 |
+
continue
|
| 1097 |
+
|
| 1098 |
+
try:
|
| 1099 |
+
img = images[img_idx]
|
| 1100 |
+
out_path = f"{prefix}_page_{page_num}.png"
|
| 1101 |
+
img.save(out_path, "PNG")
|
| 1102 |
+
print(f"📤 Extracted graph page {page_num} from {pdf_path} as {out_path}")
|
| 1103 |
+
out_paths.append(out_path)
|
| 1104 |
+
except Exception as e:
|
| 1105 |
+
print(f"❌ Failed to save page {page_num}: {e}")
|
| 1106 |
+
continue
|
| 1107 |
+
|
| 1108 |
return out_paths
|
| 1109 |
|
| 1110 |
# ---------------- PIPELINE ----------------
|
|
|
|
| 1234 |
return f"❌ Error: {e}", None, None, None, None, {}
|
| 1235 |
|
| 1236 |
# ---------------- GRADIO UI ----------------
|
| 1237 |
+
with gr.Blocks(title="AI Grading (Pandoc + pdflatex)") as demo:
|
| 1238 |
+
gr.Markdown("## 📘 AI Grading — Using Pandoc + pdflatex for PDF Generation")
|
| 1239 |
+
gr.Markdown("**✅ Now using Pandoc with pdflatex for professional-quality PDF outputs!**")
|
| 1240 |
|
| 1241 |
if supabase_client:
|
| 1242 |
gr.Markdown("**☁️ Supabase Storage: Enabled** - All files will be uploaded to cloud storage")
|