TRIAL

Sleeping

App Files Files Community

atz21 commited on Dec 9, 2025

Commit

c5c9486

verified ·

1 Parent(s): cf83915

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -41

app.py CHANGED Viewed

@@ -133,10 +133,13 @@ def save_as_pdf(text, filename="output.pdf"):
     temp_md_file = f"{base_name}_input.md"
     temp_tex_file = f"{base_name}_temp.tex"
     try:
-        print(f"📝 Processing markdown for PDF generation...")
         # Step 1: Extract Summary Report Table
         summary_pattern = re.compile(
             r"### Examiner's Summary Report\s*\n\n(\|.*?\|)\s*\n\n\*\*Total:\s*(.*?)\*\*",
             re.DOTALL
@@ -146,25 +149,26 @@ def save_as_pdf(text, filename="output.pdf"):
         if summary_match:
             summary_table_md = summary_match.group(1)
             summary_total = summary_match.group(2)
-            # Remove summary section from markdown
             text = summary_pattern.sub("", text)
-            print("✅ Extracted Examiner's Summary Report")
         else:
             summary_table_md = ""
             summary_total = ""
-            print("⚠️ No Examiner's Summary Report found")
-        # Step 2: Clean up markdown and convert HTML color spans to LaTeX
         text = cleanup_markdown_for_latex(text)
         text = convert_html_color_spans(text)
-        print("✅ Cleaned markdown and converted HTML color spans to LaTeX")
         # Save cleaned markdown
         with open(temp_md_file, 'w', encoding='utf-8') as f:
             f.write(text)
         # Step 3: Convert MD to LaTeX via Pandoc
-        print(f"📝 Converting markdown to LaTeX using Pandoc...")
         pandoc_cmd = [
             "pandoc",
             "--from=markdown",
@@ -173,27 +177,35 @@ def save_as_pdf(text, filename="output.pdf"):
             temp_md_file,
             "-o", temp_tex_file
         ]
         result = subprocess.run(pandoc_cmd, capture_output=True, check=False)
-        if result.returncode != 0 or not os.path.exists(temp_tex_file):
             try:
                 stderr = result.stderr.decode('utf-8', errors='replace')
             except:
                 stderr = str(result.stderr)
             raise Exception(f"Pandoc conversion failed: {stderr}")
-        print("✅ Pandoc conversion complete")
         # Step 4: Modify the generated LaTeX
         with open(temp_tex_file, "r", encoding="utf-8") as f:
             tex = f.read()
-        # Change document class to larger font
         tex = tex.replace(
             r"\documentclass{article}",
             r"\documentclass[12pt]{extarticle}"
         )
-        # Inject enhanced packages with better table formatting
         insert_packages = r"""\usepackage[a4paper, margin=1in]{geometry}
 \usepackage{xcolor}
 \usepackage{colortbl}
@@ -204,10 +216,14 @@ def save_as_pdf(text, filename="output.pdf"):
 \newcolumntype{L}[1]{>{\raggedright\arraybackslash}p{#1}}"""
         tex = tex.replace(r"\begin{document}", insert_packages + "\n\\begin{document}")
-        # Step 5: Build enhanced LaTeX table for summary with zebra striping (if exists)
         if summary_table_md:
             summary_rows = parse_md_table(summary_table_md)
             summary_latex = r"""\section*{Examiner's Summary Report}
 \begin{center}
 \rowcolors{2}{gray!10}{white}
@@ -218,9 +234,7 @@ def save_as_pdf(text, filename="output.pdf"):
 """
             for row in summary_rows:
                 if len(row) >= 4:
-                    # Escape special LaTeX characters in feedback
                     feedback = row[3]
-                    # Only escape if not already LaTeX code
                     if not ('$' in feedback or '\\textcolor' in feedback):
                         feedback = feedback.replace('%', r'\%').replace('&', r'\&').replace('#', r'\#')
@@ -232,19 +246,19 @@ def save_as_pdf(text, filename="output.pdf"):
             summary_latex += "\\hrulefill\n\\vspace{1cm}\n\n"
             summary_latex += "\\newpage\n\n"
-            # Insert summary right after \begin{document}
             tex = tex.replace(
                 r"\begin{document}",
                 r"\begin{document}" + "\n\n" + summary_latex
             )
-            print("✅ Injected enhanced summary table with zebra striping at top of document")
-        # Save modified LaTeX
         with open(temp_tex_file, "w", encoding="utf-8") as f:
             f.write(tex)
         # Step 6: Compile PDF with pdflatex
-        print(f"📝 Compiling PDF with pdflatex...")
         pdflatex_cmd = [
             "pdflatex",
             "-interaction=nonstopmode",
@@ -252,66 +266,85 @@ def save_as_pdf(text, filename="output.pdf"):
             temp_tex_file
         ]
-        # Run twice to resolve references
-        # Don't use text=True to avoid encoding issues with pdflatex output
         result1 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
         result2 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
-        # Check if PDF was actually created (better than checking return code)
         temp_pdf = temp_tex_file.replace(".tex", ".pdf")
         if not os.path.exists(temp_pdf):
-            # Try to decode error output for debugging
             try:
                 stderr = result2.stderr.decode('utf-8', errors='replace')
             except:
                 stderr = str(result2.stderr)
-            # Also check log file for more details
             log_file = temp_tex_file.replace(".tex", ".log")
             if os.path.exists(log_file):
                 try:
                     with open(log_file, 'r', encoding='utf-8', errors='replace') as f:
                         log_content = f.read()
-                        # Extract error lines
                         error_lines = [line for line in log_content.split('\n') if '!' in line]
                         if error_lines:
                             stderr += "\n\nLaTeX Errors:\n" + "\n".join(error_lines[:10])
-                except:
-                    pass
-            raise Exception(f"pdflatex failed to create PDF. Check LaTeX syntax. Error: {stderr[:1000]}")
-        # Move output PDF to final filename
-        if os.path.exists(temp_pdf):
-            if os.path.exists(filename):
-                os.remove(filename)
-            os.rename(temp_pdf, filename)
-        print(f"✅ PDF generated successfully: {filename}")
         # Clean up temporary files
         for ext in [".md", ".tex", ".aux", ".log", ".out"]:
             temp_file = base_name + ext
             if os.path.exists(temp_file):
                 os.remove(temp_file)
-            # Also clean input/temp variants
             for prefix in ["_input", "_temp"]:
                 temp_file = base_name + prefix + ext
                 if os.path.exists(temp_file):
                     os.remove(temp_file)
         return filename
     except subprocess.CalledProcessError as e:
-        print(f"�� Conversion failed: {e}")
         print(f"   STDOUT: {e.stdout}")
         print(f"   STDERR: {e.stderr}")
         raise Exception(f"PDF conversion failed: {e.stderr}")
     except FileNotFoundError as e:
-        print(f"❌ Required tool not found: {e}")
         raise Exception(
             "Pandoc or pdflatex not found. Please install:\n"
             "  - pandoc\n"
@@ -320,9 +353,10 @@ def save_as_pdf(text, filename="output.pdf"):
         )
     except Exception as e:
-        print(f"❌ Unexpected error during PDF conversion: {e}")
         import traceback
         traceback.print_exc()
         raise
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
@@ -608,7 +642,108 @@ def extract_marks_from_grading(grading_text):
         })
     print("✅ Extracted grading marks for", len(grading_json["grading"]), "question blocks.")
     print(json.dumps(grading_json, indent=2))
-    return grading_json
 # ---------------- MAPPING/IMPRINT HELPERS ----------------
 def ask_gemini_for_mapping_batch(image_paths, grading_json, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
@@ -891,6 +1026,14 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
         with open("debug_grading.md", "w", encoding="utf-8") as f:
             f.write(grading_text)
         base_name = os.path.splitext(os.path.basename(ans_path))[0]
         grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
         print("📄 Grading PDF saved:", grading_pdf_path)

     temp_md_file = f"{base_name}_input.md"
     temp_tex_file = f"{base_name}_temp.tex"
+    print("\n" + "="*60)
+    print("� MARKDOWoN TO PDF CONVERSION PROCESS")
+    print("="*60)
     try:
         # Step 1: Extract Summary Report Table
+        print("\n[STEP 1/6] Extracting Examiner's Summary Report...")
         summary_pattern = re.compile(
             r"### Examiner's Summary Report\s*\n\n(\|.*?\|)\s*\n\n\*\*Total:\s*(.*?)\*\*",
             re.DOTALL
         if summary_match:
             summary_table_md = summary_match.group(1)
             summary_total = summary_match.group(2)
             text = summary_pattern.sub("", text)
+            print(f"   ✅ SUCCESS: Extracted summary report with total: {summary_total}")
         else:
             summary_table_md = ""
             summary_total = ""
+            print("   ⚠️ WARNING: No Examiner's Summary Report found in markdown")
+        # Step 2: Clean up markdown
+        print("\n[STEP 2/6] Cleaning markdown and converting HTML to LaTeX...")
         text = cleanup_markdown_for_latex(text)
         text = convert_html_color_spans(text)
+        print("   ✅ SUCCESS: Markdown cleaned and HTML color spans converted")
         # Save cleaned markdown
         with open(temp_md_file, 'w', encoding='utf-8') as f:
             f.write(text)
+        print(f"   📝 Saved cleaned markdown to: {temp_md_file}")
         # Step 3: Convert MD to LaTeX via Pandoc
+        print("\n[STEP 3/6] Converting markdown to LaTeX using Pandoc...")
         pandoc_cmd = [
             "pandoc",
             "--from=markdown",
             temp_md_file,
             "-o", temp_tex_file
         ]
+        print(f"   🔧 Running: {' '.join(pandoc_cmd)}")
         result = subprocess.run(pandoc_cmd, capture_output=True, check=False)
+        if result.returncode != 0:
             try:
                 stderr = result.stderr.decode('utf-8', errors='replace')
             except:
                 stderr = str(result.stderr)
+            print(f"   ❌ FAILED: Pandoc returned error code {result.returncode}")
+            print(f"   Error details: {stderr[:500]}")
             raise Exception(f"Pandoc conversion failed: {stderr}")
+        if not os.path.exists(temp_tex_file):
+            print(f"   ❌ FAILED: LaTeX file not created at {temp_tex_file}")
+            raise Exception("Pandoc did not create the expected LaTeX file")
+        print(f"   ✅ SUCCESS: LaTeX file created at {temp_tex_file}")
         # Step 4: Modify the generated LaTeX
+        print("\n[STEP 4/6] Enhancing LaTeX document...")
         with open(temp_tex_file, "r", encoding="utf-8") as f:
             tex = f.read()
         tex = tex.replace(
             r"\documentclass{article}",
             r"\documentclass[12pt]{extarticle}"
         )
         insert_packages = r"""\usepackage[a4paper, margin=1in]{geometry}
 \usepackage{xcolor}
 \usepackage{colortbl}
 \newcolumntype{L}[1]{>{\raggedright\arraybackslash}p{#1}}"""
         tex = tex.replace(r"\begin{document}", insert_packages + "\n\\begin{document}")
+        print("   ✅ SUCCESS: Enhanced document class and added packages")
+        # Step 5: Build enhanced LaTeX table for summary
         if summary_table_md:
+            print("\n[STEP 5/6] Building enhanced summary table...")
             summary_rows = parse_md_table(summary_table_md)
+            print(f"   📊 Parsed {len(summary_rows)} rows from summary table")
             summary_latex = r"""\section*{Examiner's Summary Report}
 \begin{center}
 \rowcolors{2}{gray!10}{white}
 """
             for row in summary_rows:
                 if len(row) >= 4:
                     feedback = row[3]
                     if not ('$' in feedback or '\\textcolor' in feedback):
                         feedback = feedback.replace('%', r'\%').replace('&', r'\&').replace('#', r'\#')
             summary_latex += "\\hrulefill\n\\vspace{1cm}\n\n"
             summary_latex += "\\newpage\n\n"
             tex = tex.replace(
                 r"\begin{document}",
                 r"\begin{document}" + "\n\n" + summary_latex
             )
+            print("   ✅ SUCCESS: Summary table with zebra striping injected at document top")
+        else:
+            print("\n[STEP 5/6] Skipping summary table (not found)")
         with open(temp_tex_file, "w", encoding="utf-8") as f:
             f.write(tex)
         # Step 6: Compile PDF with pdflatex
+        print("\n[STEP 6/6] Compiling PDF with pdflatex...")
         pdflatex_cmd = [
             "pdflatex",
             "-interaction=nonstopmode",
             temp_tex_file
         ]
+        print("   🔧 Running pdflatex (pass 1/2)...")
         result1 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
+        print("   🔧 Running pdflatex (pass 2/2)...")
         result2 = subprocess.run(pdflatex_cmd, capture_output=True, check=False)
         temp_pdf = temp_tex_file.replace(".tex", ".pdf")
         if not os.path.exists(temp_pdf):
+            print(f"   ❌ FAILED: PDF not created at {temp_pdf}")
             try:
                 stderr = result2.stderr.decode('utf-8', errors='replace')
             except:
                 stderr = str(result2.stderr)
             log_file = temp_tex_file.replace(".tex", ".log")
             if os.path.exists(log_file):
+                print(f"   📋 Checking LaTeX log file: {log_file}")
                 try:
                     with open(log_file, 'r', encoding='utf-8', errors='replace') as f:
                         log_content = f.read()
                         error_lines = [line for line in log_content.split('\n') if '!' in line]
                         if error_lines:
+                            print(f"   ❌ LaTeX Errors found ({len(error_lines)} lines):")
+                            for err_line in error_lines[:10]:
+                                print(f"      {err_line}")
                             stderr += "\n\nLaTeX Errors:\n" + "\n".join(error_lines[:10])
+                except Exception as log_err:
+                    print(f"   ⚠️ Could not read log file: {log_err}")
+            raise Exception(f"pdflatex failed to create PDF. Error: {stderr[:1000]}")
+        print(f"   ✅ SUCCESS: PDF compiled at {temp_pdf}")
+        # Move output PDF to final filename
+        if os.path.exists(filename):
+            os.remove(filename)
+        os.rename(temp_pdf, filename)
+        print(f"   📦 Moved to final location: {filename}")
         # Clean up temporary files
+        print("\n[CLEANUP] Removing temporary files...")
+        cleaned_count = 0
         for ext in [".md", ".tex", ".aux", ".log", ".out"]:
             temp_file = base_name + ext
             if os.path.exists(temp_file):
                 os.remove(temp_file)
+                cleaned_count += 1
             for prefix in ["_input", "_temp"]:
                 temp_file = base_name + prefix + ext
                 if os.path.exists(temp_file):
                     os.remove(temp_file)
+                    cleaned_count += 1
+        print(f"   🧹 Cleaned up {cleaned_count} temporary files")
+        print("\n" + "="*60)
+        print("✅ PDF CONVERSION COMPLETED SUCCESSFULLY")
+        print(f"📄 Output file: {filename}")
+        print("="*60 + "\n")
         return filename
     except subprocess.CalledProcessError as e:
+        print(f"\n❌ SUBPROCESS ERROR: {e}")
         print(f"   STDOUT: {e.stdout}")
         print(f"   STDERR: {e.stderr}")
+        print("="*60 + "\n")
         raise Exception(f"PDF conversion failed: {e.stderr}")
     except FileNotFoundError as e:
+        print(f"\n❌ FILE NOT FOUND ERROR: {e}")
+        print("="*60)
+        print("⚠️ REQUIRED TOOLS MISSING")
+        print("Please install the following:")
+        print("  • pandoc")
+        print("  • texlive (or MiKTeX on Windows)")
+        print("  • texlive-latex-extra (for extarticle class)")
+        print("="*60 + "\n")
         raise Exception(
             "Pandoc or pdflatex not found. Please install:\n"
             "  - pandoc\n"
         )
     except Exception as e:
+        print(f"\n❌ UNEXPECTED ERROR: {e}")
         import traceback
         traceback.print_exc()
+        print("="*60 + "\n")
         raise
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
         })
     print("✅ Extracted grading marks for", len(grading_json["grading"]), "question blocks.")
     print(json.dumps(grading_json, indent=2))
+    return grading_json
+def check_and_correct_total_marks(grading_text):
+    """
+    Verifies the total marks in the Examiner's Summary Report against
+    the sum of individual question marks. Corrects if discrepancy found.
+    Args:
+        grading_text (str): The full grading markdown text
+    Returns:
+        tuple: (corrected_text, calculated_awarded, calculated_possible, was_corrected)
+    """
+    print("\n" + "="*60)
+    print("🔍 VERIFYING TOTAL MARKS IN SUMMARY REPORT")
+    print("="*60)
+    question_marks = {}
+    calculated_total_awarded = 0
+    calculated_total_possible = 0
+    # Extract marks from each question block
+    question_block_pattern = re.compile(
+        r"### Question (\d+\.?[a-z]?.*?)[\s\S]*?\*\*Total: (\d+)/(\d+)\*\*",
+        re.DOTALL
+    )
+    matches = question_block_pattern.finditer(grading_text)
+    for match in matches:
+        question_id = match.group(1).strip()
+        awarded = int(match.group(2))
+        possible = int(match.group(3))
+        question_marks[question_id] = {'awarded': awarded, 'possible': possible}
+        calculated_total_awarded += awarded
+        calculated_total_possible += possible
+    print(f"\n📊 Extracted marks from {len(question_marks)} questions:")
+    for q_id, marks in question_marks.items():
+        print(f"   Question {q_id}: {marks['awarded']}/{marks['possible']}")
+    print(f"\n📈 Calculated totals from individual questions:")
+    print(f"   Awarded: {calculated_total_awarded}")
+    print(f"   Possible: {calculated_total_possible}")
+    # Find the summary report section
+    summary_report_start = grading_text.find("### Examiner's Summary Report")
+    if summary_report_start == -1:
+        print("⚠️ Warning: Could not find '### Examiner's Summary Report' section.")
+        return grading_text, calculated_total_awarded, calculated_total_possible, False
+    summary_section = grading_text[summary_report_start:]
+    summary_total_pattern = re.compile(r"(\*\*Total:\s*)(\d+)/(\d+)(\*\*)")
+    summary_match = summary_total_pattern.search(summary_section)
+    original_summary_awarded = 0
+    original_summary_possible = 0
+    if summary_match:
+        original_summary_awarded = int(summary_match.group(2))
+        original_summary_possible = int(summary_match.group(3))
+        print(f"\n📋 Original summary report total: {original_summary_awarded}/{original_summary_possible}")
+    else:
+        print("⚠️ Warning: Could not find overall total in summary report.")
+        return grading_text, calculated_total_awarded, calculated_total_possible, False
+    # Check for discrepancies
+    corrected_report_text = grading_text
+    total_mismatch = False
+    if calculated_total_awarded != original_summary_awarded:
+        print(f"\n❌ DISCREPANCY FOUND in awarded marks!")
+        print(f"   Calculated: {calculated_total_awarded}")
+        print(f"   Reported: {original_summary_awarded}")
+        total_mismatch = True
+    if calculated_total_possible != original_summary_possible:
+        print(f"\n❌ DISCREPANCY FOUND in possible marks!")
+        print(f"   Calculated: {calculated_total_possible}")
+        print(f"   Reported: {original_summary_possible}")
+        total_mismatch = True
+    if total_mismatch:
+        print(f"\n🔧 CORRECTING summary total:")
+        print(f"   FROM: {original_summary_awarded}/{original_summary_possible}")
+        print(f"   TO:   {calculated_total_awarded}/{calculated_total_possible}")
+        # Correct only in the summary section
+        corrected_summary_section = re.sub(
+            summary_total_pattern,
+            rf"\g<1>{calculated_total_awarded}/{calculated_total_possible}\g<4>",
+            summary_section,
+            count=1
+        )
+        corrected_report_text = grading_text[:summary_report_start] + corrected_summary_section
+        print("✅ Total marks corrected successfully!")
+    else:
+        print("\n✅ Total marks are CORRECT - no correction needed!")
+    print("="*60 + "\n")
+    return corrected_report_text, calculated_total_awarded, calculated_total_possible, total_mismatch
 # ---------------- MAPPING/IMPRINT HELPERS ----------------
 def ask_gemini_for_mapping_batch(image_paths, grading_json, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
         with open("debug_grading.md", "w", encoding="utf-8") as f:
             f.write(grading_text)
+        # Verify and correct total marks if needed
+        grading_text, calc_awarded, calc_possible, was_corrected = check_and_correct_total_marks(grading_text)
+        if was_corrected:
+            print("📝 Saving corrected grading to debug file: debug_grading_corrected.md")
+            with open("debug_grading_corrected.md", "w", encoding="utf-8") as f:
+                f.write(grading_text)
         base_name = os.path.splitext(os.path.basename(ans_path))[0]
         grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
         print("📄 Grading PDF saved:", grading_pdf_path)