Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -133,10 +133,13 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 133 |
temp_md_file = f"{base_name}_input.md"
|
| 134 |
temp_tex_file = f"{base_name}_temp.tex"
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
try:
|
| 137 |
-
print(f"π Processing markdown for PDF generation...")
|
| 138 |
-
|
| 139 |
# Step 1: Extract Summary Report Table
|
|
|
|
| 140 |
summary_pattern = re.compile(
|
| 141 |
r"### Examiner's Summary Report\s*\n\n(\|.*?\|)\s*\n\n\*\*Total:\s*(.*?)\*\*",
|
| 142 |
re.DOTALL
|
|
@@ -146,25 +149,26 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 146 |
if summary_match:
|
| 147 |
summary_table_md = summary_match.group(1)
|
| 148 |
summary_total = summary_match.group(2)
|
| 149 |
-
# Remove summary section from markdown
|
| 150 |
text = summary_pattern.sub("", text)
|
| 151 |
-
print("β
Extracted
|
| 152 |
else:
|
| 153 |
summary_table_md = ""
|
| 154 |
summary_total = ""
|
| 155 |
-
print("β οΈ No Examiner's Summary Report found")
|
| 156 |
|
| 157 |
-
# Step 2: Clean up markdown
|
|
|
|
| 158 |
text = cleanup_markdown_for_latex(text)
|
| 159 |
text = convert_html_color_spans(text)
|
| 160 |
-
print("β
|
| 161 |
|
| 162 |
# Save cleaned markdown
|
| 163 |
with open(temp_md_file, 'w', encoding='utf-8') as f:
|
| 164 |
f.write(text)
|
|
|
|
| 165 |
|
| 166 |
# Step 3: Convert MD to LaTeX via Pandoc
|
| 167 |
-
print(
|
| 168 |
pandoc_cmd = [
|
| 169 |
"pandoc",
|
| 170 |
"--from=markdown",
|
|
@@ -173,27 +177,35 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 173 |
temp_md_file,
|
| 174 |
"-o", temp_tex_file
|
| 175 |
]
|
|
|
|
| 176 |
|
| 177 |
result = subprocess.run(pandoc_cmd, capture_output=True, check=False)
|
| 178 |
-
|
|
|
|
| 179 |
try:
|
| 180 |
stderr = result.stderr.decode('utf-8', errors='replace')
|
| 181 |
except:
|
| 182 |
stderr = str(result.stderr)
|
|
|
|
|
|
|
| 183 |
raise Exception(f"Pandoc conversion failed: {stderr}")
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
# Step 4: Modify the generated LaTeX
|
|
|
|
| 187 |
with open(temp_tex_file, "r", encoding="utf-8") as f:
|
| 188 |
tex = f.read()
|
| 189 |
|
| 190 |
-
# Change document class to larger font
|
| 191 |
tex = tex.replace(
|
| 192 |
r"\documentclass{article}",
|
| 193 |
r"\documentclass[12pt]{extarticle}"
|
| 194 |
)
|
| 195 |
|
| 196 |
-
# Inject enhanced packages with better table formatting
|
| 197 |
insert_packages = r"""\usepackage[a4paper, margin=1in]{geometry}
|
| 198 |
\usepackage{xcolor}
|
| 199 |
\usepackage{colortbl}
|
|
@@ -204,10 +216,14 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 204 |
\newcolumntype{L}[1]{>{\raggedright\arraybackslash}p{#1}}"""
|
| 205 |
|
| 206 |
tex = tex.replace(r"\begin{document}", insert_packages + "\n\\begin{document}")
|
|
|
|
| 207 |
|
| 208 |
-
# Step 5: Build enhanced LaTeX table for summary
|
| 209 |
if summary_table_md:
|
|
|
|
| 210 |
summary_rows = parse_md_table(summary_table_md)
|
|
|
|
|
|
|
| 211 |
summary_latex = r"""\section*{Examiner's Summary Report}
|
| 212 |
\begin{center}
|
| 213 |
\rowcolors{2}{gray!10}{white}
|
|
@@ -218,9 +234,7 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 218 |
"""
|
| 219 |
for row in summary_rows:
|
| 220 |
if len(row) >= 4:
|
| 221 |
-
# Escape special LaTeX characters in feedback
|
| 222 |
feedback = row[3]
|
| 223 |
-
# Only escape if not already LaTeX code
|
| 224 |
if not ('$' in feedback or '\\textcolor' in feedback):
|
| 225 |
feedback = feedback.replace('%', r'\%').replace('&', r'\&').replace('#', r'\#')
|
| 226 |
|
|
@@ -232,19 +246,19 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 232 |
summary_latex += "\\hrulefill\n\\vspace{1cm}\n\n"
|
| 233 |
summary_latex += "\\newpage\n\n"
|
| 234 |
|
| 235 |
-
# Insert summary right after \begin{document}
|
| 236 |
tex = tex.replace(
|
| 237 |
r"\begin{document}",
|
| 238 |
r"\begin{document}" + "\n\n" + summary_latex
|
| 239 |
)
|
| 240 |
-
print("β
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
# Save modified LaTeX
|
| 243 |
with open(temp_tex_file, "w", encoding="utf-8") as f:
|
| 244 |
f.write(tex)
|
| 245 |
|
| 246 |
# Step 6: Compile PDF with pdflatex
|
| 247 |
-
print(
|
| 248 |
pdflatex_cmd = [
|
| 249 |
"pdflatex",
|
| 250 |
"-interaction=nonstopmode",
|
|
@@ -252,66 +266,85 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 252 |
temp_tex_file
|
| 253 |
]
|
| 254 |
|
| 255 |
-
|
| 256 |
-
# Don't use text=True to avoid encoding issues with pdflatex output
|
| 257 |
result1 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
|
|
|
|
|
|
|
| 258 |
result2 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
|
| 259 |
|
| 260 |
-
# Check if PDF was actually created (better than checking return code)
|
| 261 |
temp_pdf = temp_tex_file.replace(".tex", ".pdf")
|
|
|
|
| 262 |
if not os.path.exists(temp_pdf):
|
| 263 |
-
|
|
|
|
| 264 |
try:
|
| 265 |
stderr = result2.stderr.decode('utf-8', errors='replace')
|
| 266 |
except:
|
| 267 |
stderr = str(result2.stderr)
|
| 268 |
|
| 269 |
-
# Also check log file for more details
|
| 270 |
log_file = temp_tex_file.replace(".tex", ".log")
|
| 271 |
if os.path.exists(log_file):
|
|
|
|
| 272 |
try:
|
| 273 |
with open(log_file, 'r', encoding='utf-8', errors='replace') as f:
|
| 274 |
log_content = f.read()
|
| 275 |
-
# Extract error lines
|
| 276 |
error_lines = [line for line in log_content.split('\n') if '!' in line]
|
| 277 |
if error_lines:
|
|
|
|
|
|
|
|
|
|
| 278 |
stderr += "\n\nLaTeX Errors:\n" + "\n".join(error_lines[:10])
|
| 279 |
-
except:
|
| 280 |
-
|
| 281 |
|
| 282 |
-
raise Exception(f"pdflatex failed to create PDF.
|
| 283 |
|
| 284 |
-
|
| 285 |
-
if os.path.exists(temp_pdf):
|
| 286 |
-
if os.path.exists(filename):
|
| 287 |
-
os.remove(filename)
|
| 288 |
-
os.rename(temp_pdf, filename)
|
| 289 |
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
# Clean up temporary files
|
|
|
|
|
|
|
| 293 |
for ext in [".md", ".tex", ".aux", ".log", ".out"]:
|
| 294 |
temp_file = base_name + ext
|
| 295 |
if os.path.exists(temp_file):
|
| 296 |
os.remove(temp_file)
|
| 297 |
-
|
| 298 |
for prefix in ["_input", "_temp"]:
|
| 299 |
temp_file = base_name + prefix + ext
|
| 300 |
if os.path.exists(temp_file):
|
| 301 |
os.remove(temp_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
return filename
|
| 304 |
|
| 305 |
except subprocess.CalledProcessError as e:
|
| 306 |
-
print(f"
|
| 307 |
print(f" STDOUT: {e.stdout}")
|
| 308 |
print(f" STDERR: {e.stderr}")
|
| 309 |
-
|
| 310 |
raise Exception(f"PDF conversion failed: {e.stderr}")
|
| 311 |
|
| 312 |
except FileNotFoundError as e:
|
| 313 |
-
print(f"β
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
raise Exception(
|
| 316 |
"Pandoc or pdflatex not found. Please install:\n"
|
| 317 |
" - pandoc\n"
|
|
@@ -320,9 +353,10 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 320 |
)
|
| 321 |
|
| 322 |
except Exception as e:
|
| 323 |
-
print(f"β
|
| 324 |
import traceback
|
| 325 |
traceback.print_exc()
|
|
|
|
| 326 |
raise
|
| 327 |
|
| 328 |
def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
|
|
@@ -608,7 +642,108 @@ def extract_marks_from_grading(grading_text):
|
|
| 608 |
})
|
| 609 |
print("β
Extracted grading marks for", len(grading_json["grading"]), "question blocks.")
|
| 610 |
print(json.dumps(grading_json, indent=2))
|
| 611 |
-
return grading_json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
|
| 613 |
# ---------------- MAPPING/IMPRINT HELPERS ----------------
|
| 614 |
def ask_gemini_for_mapping_batch(image_paths, grading_json, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
|
|
@@ -891,6 +1026,14 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
|
|
| 891 |
with open("debug_grading.md", "w", encoding="utf-8") as f:
|
| 892 |
f.write(grading_text)
|
| 893 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 894 |
base_name = os.path.splitext(os.path.basename(ans_path))[0]
|
| 895 |
grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
|
| 896 |
print("π Grading PDF saved:", grading_pdf_path)
|
|
|
|
| 133 |
temp_md_file = f"{base_name}_input.md"
|
| 134 |
temp_tex_file = f"{base_name}_temp.tex"
|
| 135 |
|
| 136 |
+
print("\n" + "="*60)
|
| 137 |
+
print("οΏ½ MARKDOWoN TO PDF CONVERSION PROCESS")
|
| 138 |
+
print("="*60)
|
| 139 |
+
|
| 140 |
try:
|
|
|
|
|
|
|
| 141 |
# Step 1: Extract Summary Report Table
|
| 142 |
+
print("\n[STEP 1/6] Extracting Examiner's Summary Report...")
|
| 143 |
summary_pattern = re.compile(
|
| 144 |
r"### Examiner's Summary Report\s*\n\n(\|.*?\|)\s*\n\n\*\*Total:\s*(.*?)\*\*",
|
| 145 |
re.DOTALL
|
|
|
|
| 149 |
if summary_match:
|
| 150 |
summary_table_md = summary_match.group(1)
|
| 151 |
summary_total = summary_match.group(2)
|
|
|
|
| 152 |
text = summary_pattern.sub("", text)
|
| 153 |
+
print(f" β
SUCCESS: Extracted summary report with total: {summary_total}")
|
| 154 |
else:
|
| 155 |
summary_table_md = ""
|
| 156 |
summary_total = ""
|
| 157 |
+
print(" β οΈ WARNING: No Examiner's Summary Report found in markdown")
|
| 158 |
|
| 159 |
+
# Step 2: Clean up markdown
|
| 160 |
+
print("\n[STEP 2/6] Cleaning markdown and converting HTML to LaTeX...")
|
| 161 |
text = cleanup_markdown_for_latex(text)
|
| 162 |
text = convert_html_color_spans(text)
|
| 163 |
+
print(" β
SUCCESS: Markdown cleaned and HTML color spans converted")
|
| 164 |
|
| 165 |
# Save cleaned markdown
|
| 166 |
with open(temp_md_file, 'w', encoding='utf-8') as f:
|
| 167 |
f.write(text)
|
| 168 |
+
print(f" π Saved cleaned markdown to: {temp_md_file}")
|
| 169 |
|
| 170 |
# Step 3: Convert MD to LaTeX via Pandoc
|
| 171 |
+
print("\n[STEP 3/6] Converting markdown to LaTeX using Pandoc...")
|
| 172 |
pandoc_cmd = [
|
| 173 |
"pandoc",
|
| 174 |
"--from=markdown",
|
|
|
|
| 177 |
temp_md_file,
|
| 178 |
"-o", temp_tex_file
|
| 179 |
]
|
| 180 |
+
print(f" π§ Running: {' '.join(pandoc_cmd)}")
|
| 181 |
|
| 182 |
result = subprocess.run(pandoc_cmd, capture_output=True, check=False)
|
| 183 |
+
|
| 184 |
+
if result.returncode != 0:
|
| 185 |
try:
|
| 186 |
stderr = result.stderr.decode('utf-8', errors='replace')
|
| 187 |
except:
|
| 188 |
stderr = str(result.stderr)
|
| 189 |
+
print(f" β FAILED: Pandoc returned error code {result.returncode}")
|
| 190 |
+
print(f" Error details: {stderr[:500]}")
|
| 191 |
raise Exception(f"Pandoc conversion failed: {stderr}")
|
| 192 |
+
|
| 193 |
+
if not os.path.exists(temp_tex_file):
|
| 194 |
+
print(f" β FAILED: LaTeX file not created at {temp_tex_file}")
|
| 195 |
+
raise Exception("Pandoc did not create the expected LaTeX file")
|
| 196 |
+
|
| 197 |
+
print(f" β
SUCCESS: LaTeX file created at {temp_tex_file}")
|
| 198 |
|
| 199 |
# Step 4: Modify the generated LaTeX
|
| 200 |
+
print("\n[STEP 4/6] Enhancing LaTeX document...")
|
| 201 |
with open(temp_tex_file, "r", encoding="utf-8") as f:
|
| 202 |
tex = f.read()
|
| 203 |
|
|
|
|
| 204 |
tex = tex.replace(
|
| 205 |
r"\documentclass{article}",
|
| 206 |
r"\documentclass[12pt]{extarticle}"
|
| 207 |
)
|
| 208 |
|
|
|
|
| 209 |
insert_packages = r"""\usepackage[a4paper, margin=1in]{geometry}
|
| 210 |
\usepackage{xcolor}
|
| 211 |
\usepackage{colortbl}
|
|
|
|
| 216 |
\newcolumntype{L}[1]{>{\raggedright\arraybackslash}p{#1}}"""
|
| 217 |
|
| 218 |
tex = tex.replace(r"\begin{document}", insert_packages + "\n\\begin{document}")
|
| 219 |
+
print(" β
SUCCESS: Enhanced document class and added packages")
|
| 220 |
|
| 221 |
+
# Step 5: Build enhanced LaTeX table for summary
|
| 222 |
if summary_table_md:
|
| 223 |
+
print("\n[STEP 5/6] Building enhanced summary table...")
|
| 224 |
summary_rows = parse_md_table(summary_table_md)
|
| 225 |
+
print(f" π Parsed {len(summary_rows)} rows from summary table")
|
| 226 |
+
|
| 227 |
summary_latex = r"""\section*{Examiner's Summary Report}
|
| 228 |
\begin{center}
|
| 229 |
\rowcolors{2}{gray!10}{white}
|
|
|
|
| 234 |
"""
|
| 235 |
for row in summary_rows:
|
| 236 |
if len(row) >= 4:
|
|
|
|
| 237 |
feedback = row[3]
|
|
|
|
| 238 |
if not ('$' in feedback or '\\textcolor' in feedback):
|
| 239 |
feedback = feedback.replace('%', r'\%').replace('&', r'\&').replace('#', r'\#')
|
| 240 |
|
|
|
|
| 246 |
summary_latex += "\\hrulefill\n\\vspace{1cm}\n\n"
|
| 247 |
summary_latex += "\\newpage\n\n"
|
| 248 |
|
|
|
|
| 249 |
tex = tex.replace(
|
| 250 |
r"\begin{document}",
|
| 251 |
r"\begin{document}" + "\n\n" + summary_latex
|
| 252 |
)
|
| 253 |
+
print(" β
SUCCESS: Summary table with zebra striping injected at document top")
|
| 254 |
+
else:
|
| 255 |
+
print("\n[STEP 5/6] Skipping summary table (not found)")
|
| 256 |
|
|
|
|
| 257 |
with open(temp_tex_file, "w", encoding="utf-8") as f:
|
| 258 |
f.write(tex)
|
| 259 |
|
| 260 |
# Step 6: Compile PDF with pdflatex
|
| 261 |
+
print("\n[STEP 6/6] Compiling PDF with pdflatex...")
|
| 262 |
pdflatex_cmd = [
|
| 263 |
"pdflatex",
|
| 264 |
"-interaction=nonstopmode",
|
|
|
|
| 266 |
temp_tex_file
|
| 267 |
]
|
| 268 |
|
| 269 |
+
print(" π§ Running pdflatex (pass 1/2)...")
|
|
|
|
| 270 |
result1 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
|
| 271 |
+
|
| 272 |
+
print(" π§ Running pdflatex (pass 2/2)...")
|
| 273 |
result2 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
|
| 274 |
|
|
|
|
| 275 |
temp_pdf = temp_tex_file.replace(".tex", ".pdf")
|
| 276 |
+
|
| 277 |
if not os.path.exists(temp_pdf):
|
| 278 |
+
print(f" β FAILED: PDF not created at {temp_pdf}")
|
| 279 |
+
|
| 280 |
try:
|
| 281 |
stderr = result2.stderr.decode('utf-8', errors='replace')
|
| 282 |
except:
|
| 283 |
stderr = str(result2.stderr)
|
| 284 |
|
|
|
|
| 285 |
log_file = temp_tex_file.replace(".tex", ".log")
|
| 286 |
if os.path.exists(log_file):
|
| 287 |
+
print(f" π Checking LaTeX log file: {log_file}")
|
| 288 |
try:
|
| 289 |
with open(log_file, 'r', encoding='utf-8', errors='replace') as f:
|
| 290 |
log_content = f.read()
|
|
|
|
| 291 |
error_lines = [line for line in log_content.split('\n') if '!' in line]
|
| 292 |
if error_lines:
|
| 293 |
+
print(f" β LaTeX Errors found ({len(error_lines)} lines):")
|
| 294 |
+
for err_line in error_lines[:10]:
|
| 295 |
+
print(f" {err_line}")
|
| 296 |
stderr += "\n\nLaTeX Errors:\n" + "\n".join(error_lines[:10])
|
| 297 |
+
except Exception as log_err:
|
| 298 |
+
print(f" β οΈ Could not read log file: {log_err}")
|
| 299 |
|
| 300 |
+
raise Exception(f"pdflatex failed to create PDF. Error: {stderr[:1000]}")
|
| 301 |
|
| 302 |
+
print(f" β
SUCCESS: PDF compiled at {temp_pdf}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
+
# Move output PDF to final filename
|
| 305 |
+
if os.path.exists(filename):
|
| 306 |
+
os.remove(filename)
|
| 307 |
+
os.rename(temp_pdf, filename)
|
| 308 |
+
print(f" π¦ Moved to final location: {filename}")
|
| 309 |
|
| 310 |
# Clean up temporary files
|
| 311 |
+
print("\n[CLEANUP] Removing temporary files...")
|
| 312 |
+
cleaned_count = 0
|
| 313 |
for ext in [".md", ".tex", ".aux", ".log", ".out"]:
|
| 314 |
temp_file = base_name + ext
|
| 315 |
if os.path.exists(temp_file):
|
| 316 |
os.remove(temp_file)
|
| 317 |
+
cleaned_count += 1
|
| 318 |
for prefix in ["_input", "_temp"]:
|
| 319 |
temp_file = base_name + prefix + ext
|
| 320 |
if os.path.exists(temp_file):
|
| 321 |
os.remove(temp_file)
|
| 322 |
+
cleaned_count += 1
|
| 323 |
+
print(f" π§Ή Cleaned up {cleaned_count} temporary files")
|
| 324 |
+
|
| 325 |
+
print("\n" + "="*60)
|
| 326 |
+
print("β
PDF CONVERSION COMPLETED SUCCESSFULLY")
|
| 327 |
+
print(f"π Output file: {filename}")
|
| 328 |
+
print("="*60 + "\n")
|
| 329 |
|
| 330 |
return filename
|
| 331 |
|
| 332 |
except subprocess.CalledProcessError as e:
|
| 333 |
+
print(f"\nβ SUBPROCESS ERROR: {e}")
|
| 334 |
print(f" STDOUT: {e.stdout}")
|
| 335 |
print(f" STDERR: {e.stderr}")
|
| 336 |
+
print("="*60 + "\n")
|
| 337 |
raise Exception(f"PDF conversion failed: {e.stderr}")
|
| 338 |
|
| 339 |
except FileNotFoundError as e:
|
| 340 |
+
print(f"\nβ FILE NOT FOUND ERROR: {e}")
|
| 341 |
+
print("="*60)
|
| 342 |
+
print("β οΈ REQUIRED TOOLS MISSING")
|
| 343 |
+
print("Please install the following:")
|
| 344 |
+
print(" β’ pandoc")
|
| 345 |
+
print(" β’ texlive (or MiKTeX on Windows)")
|
| 346 |
+
print(" β’ texlive-latex-extra (for extarticle class)")
|
| 347 |
+
print("="*60 + "\n")
|
| 348 |
raise Exception(
|
| 349 |
"Pandoc or pdflatex not found. Please install:\n"
|
| 350 |
" - pandoc\n"
|
|
|
|
| 353 |
)
|
| 354 |
|
| 355 |
except Exception as e:
|
| 356 |
+
print(f"\nβ UNEXPECTED ERROR: {e}")
|
| 357 |
import traceback
|
| 358 |
traceback.print_exc()
|
| 359 |
+
print("="*60 + "\n")
|
| 360 |
raise
|
| 361 |
|
| 362 |
def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
|
|
|
|
| 642 |
})
|
| 643 |
print("β
Extracted grading marks for", len(grading_json["grading"]), "question blocks.")
|
| 644 |
print(json.dumps(grading_json, indent=2))
|
| 645 |
+
return grading_json
|
| 646 |
+
|
| 647 |
+
def check_and_correct_total_marks(grading_text):
|
| 648 |
+
"""
|
| 649 |
+
Verifies the total marks in the Examiner's Summary Report against
|
| 650 |
+
the sum of individual question marks. Corrects if discrepancy found.
|
| 651 |
+
|
| 652 |
+
Args:
|
| 653 |
+
grading_text (str): The full grading markdown text
|
| 654 |
+
|
| 655 |
+
Returns:
|
| 656 |
+
tuple: (corrected_text, calculated_awarded, calculated_possible, was_corrected)
|
| 657 |
+
"""
|
| 658 |
+
print("\n" + "="*60)
|
| 659 |
+
print("π VERIFYING TOTAL MARKS IN SUMMARY REPORT")
|
| 660 |
+
print("="*60)
|
| 661 |
+
|
| 662 |
+
question_marks = {}
|
| 663 |
+
calculated_total_awarded = 0
|
| 664 |
+
calculated_total_possible = 0
|
| 665 |
+
|
| 666 |
+
# Extract marks from each question block
|
| 667 |
+
question_block_pattern = re.compile(
|
| 668 |
+
r"### Question (\d+\.?[a-z]?.*?)[\s\S]*?\*\*Total: (\d+)/(\d+)\*\*",
|
| 669 |
+
re.DOTALL
|
| 670 |
+
)
|
| 671 |
+
|
| 672 |
+
matches = question_block_pattern.finditer(grading_text)
|
| 673 |
+
for match in matches:
|
| 674 |
+
question_id = match.group(1).strip()
|
| 675 |
+
awarded = int(match.group(2))
|
| 676 |
+
possible = int(match.group(3))
|
| 677 |
+
question_marks[question_id] = {'awarded': awarded, 'possible': possible}
|
| 678 |
+
calculated_total_awarded += awarded
|
| 679 |
+
calculated_total_possible += possible
|
| 680 |
+
|
| 681 |
+
print(f"\nπ Extracted marks from {len(question_marks)} questions:")
|
| 682 |
+
for q_id, marks in question_marks.items():
|
| 683 |
+
print(f" Question {q_id}: {marks['awarded']}/{marks['possible']}")
|
| 684 |
+
|
| 685 |
+
print(f"\nπ Calculated totals from individual questions:")
|
| 686 |
+
print(f" Awarded: {calculated_total_awarded}")
|
| 687 |
+
print(f" Possible: {calculated_total_possible}")
|
| 688 |
+
|
| 689 |
+
# Find the summary report section
|
| 690 |
+
summary_report_start = grading_text.find("### Examiner's Summary Report")
|
| 691 |
+
if summary_report_start == -1:
|
| 692 |
+
print("β οΈ Warning: Could not find '### Examiner's Summary Report' section.")
|
| 693 |
+
return grading_text, calculated_total_awarded, calculated_total_possible, False
|
| 694 |
+
|
| 695 |
+
summary_section = grading_text[summary_report_start:]
|
| 696 |
+
summary_total_pattern = re.compile(r"(\*\*Total:\s*)(\d+)/(\d+)(\*\*)")
|
| 697 |
+
summary_match = summary_total_pattern.search(summary_section)
|
| 698 |
+
|
| 699 |
+
original_summary_awarded = 0
|
| 700 |
+
original_summary_possible = 0
|
| 701 |
+
|
| 702 |
+
if summary_match:
|
| 703 |
+
original_summary_awarded = int(summary_match.group(2))
|
| 704 |
+
original_summary_possible = int(summary_match.group(3))
|
| 705 |
+
print(f"\nπ Original summary report total: {original_summary_awarded}/{original_summary_possible}")
|
| 706 |
+
else:
|
| 707 |
+
print("β οΈ Warning: Could not find overall total in summary report.")
|
| 708 |
+
return grading_text, calculated_total_awarded, calculated_total_possible, False
|
| 709 |
+
|
| 710 |
+
# Check for discrepancies
|
| 711 |
+
corrected_report_text = grading_text
|
| 712 |
+
total_mismatch = False
|
| 713 |
+
|
| 714 |
+
if calculated_total_awarded != original_summary_awarded:
|
| 715 |
+
print(f"\nβ DISCREPANCY FOUND in awarded marks!")
|
| 716 |
+
print(f" Calculated: {calculated_total_awarded}")
|
| 717 |
+
print(f" Reported: {original_summary_awarded}")
|
| 718 |
+
total_mismatch = True
|
| 719 |
+
|
| 720 |
+
if calculated_total_possible != original_summary_possible:
|
| 721 |
+
print(f"\nβ DISCREPANCY FOUND in possible marks!")
|
| 722 |
+
print(f" Calculated: {calculated_total_possible}")
|
| 723 |
+
print(f" Reported: {original_summary_possible}")
|
| 724 |
+
total_mismatch = True
|
| 725 |
+
|
| 726 |
+
if total_mismatch:
|
| 727 |
+
print(f"\nπ§ CORRECTING summary total:")
|
| 728 |
+
print(f" FROM: {original_summary_awarded}/{original_summary_possible}")
|
| 729 |
+
print(f" TO: {calculated_total_awarded}/{calculated_total_possible}")
|
| 730 |
+
|
| 731 |
+
# Correct only in the summary section
|
| 732 |
+
corrected_summary_section = re.sub(
|
| 733 |
+
summary_total_pattern,
|
| 734 |
+
rf"\g<1>{calculated_total_awarded}/{calculated_total_possible}\g<4>",
|
| 735 |
+
summary_section,
|
| 736 |
+
count=1
|
| 737 |
+
)
|
| 738 |
+
|
| 739 |
+
corrected_report_text = grading_text[:summary_report_start] + corrected_summary_section
|
| 740 |
+
print("β
Total marks corrected successfully!")
|
| 741 |
+
else:
|
| 742 |
+
print("\nβ
Total marks are CORRECT - no correction needed!")
|
| 743 |
+
|
| 744 |
+
print("="*60 + "\n")
|
| 745 |
+
|
| 746 |
+
return corrected_report_text, calculated_total_awarded, calculated_total_possible, total_mismatch
|
| 747 |
|
| 748 |
# ---------------- MAPPING/IMPRINT HELPERS ----------------
|
| 749 |
def ask_gemini_for_mapping_batch(image_paths, grading_json, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
|
|
|
|
| 1026 |
with open("debug_grading.md", "w", encoding="utf-8") as f:
|
| 1027 |
f.write(grading_text)
|
| 1028 |
|
| 1029 |
+
# Verify and correct total marks if needed
|
| 1030 |
+
grading_text, calc_awarded, calc_possible, was_corrected = check_and_correct_total_marks(grading_text)
|
| 1031 |
+
|
| 1032 |
+
if was_corrected:
|
| 1033 |
+
print("π Saving corrected grading to debug file: debug_grading_corrected.md")
|
| 1034 |
+
with open("debug_grading_corrected.md", "w", encoding="utf-8") as f:
|
| 1035 |
+
f.write(grading_text)
|
| 1036 |
+
|
| 1037 |
base_name = os.path.splitext(os.path.basename(ans_path))[0]
|
| 1038 |
grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
|
| 1039 |
print("π Grading PDF saved:", grading_pdf_path)
|