Spaces:

Varshithdharmajv
/

mvm2-math-verification

Sleeping

App Files Files Community

Varshithdharmajv commited on 13 days ago

Commit

17ff84e

verified ·

1 Parent(s): a84c47e

Upload ocr_module.py with huggingface_hub

Browse files

Files changed (1) hide show

ocr_module.py +25 -6

ocr_module.py CHANGED Viewed

@@ -1,9 +1,8 @@
-import os
-import json
-import random
-import re
-from typing import Dict, List, Any
 from PIL import Image
 # MVM2 Configuration for OCR Confidence Weights
 CRITICAL_OPERATORS = ["\\int", "\\sum", "=", "\\frac", "+", "-", "*", "\\times", "\\div"]
@@ -90,6 +89,17 @@ class MVM2OCREngine:
         except Exception as e:
             print(f"[OCR] Warning: Pix2Text unavailable ({e}). Using simulation mode.")
     def process_image(self, image_path: str) -> Dict[str, Any]:
         """Full OCR pipeline with CJK filtering and confidence scoring."""
         if not os.path.exists(image_path):
@@ -131,10 +141,19 @@ class MVM2OCREngine:
         # Final CJK cleanup pass (catches anything that slipped through)
         raw_latex = clean_latex_output(raw_latex)
         ocr_conf = calculate_weighted_confidence(raw_latex)
         return {
             "latex_output": raw_latex,
             "weighted_confidence": ocr_conf,
-            "backend": "pix2text" if self.model_loaded else "simulation"
         }

 from PIL import Image
+import sys
+# Handwrite Transcription models are bundled in this folder
+MODEL_PATH = os.path.join(os.getcwd(), "handwritten-math-transcription", "checkpoints", "model_v3_0.pth")
 # MVM2 Configuration for OCR Confidence Weights
 CRITICAL_OPERATORS = ["\\int", "\\sum", "=", "\\frac", "+", "-", "*", "\\times", "\\div"]
         except Exception as e:
             print(f"[OCR] Warning: Pix2Text unavailable ({e}). Using simulation mode.")
+        self.transcriber = None
+        try:
+            from handwriting_transcriber import HandwritingTranscriber
+            if os.path.exists(MODEL_PATH):
+                self.transcriber = HandwritingTranscriber(model_path=MODEL_PATH)
+                print(f"[OCR] HandwritingTranscriber loaded with model: {MODEL_PATH}")
+            else:
+                print(f"[OCR] Warning: Handwriting model not found at {MODEL_PATH}")
+        except Exception as e:
+            print(f"[OCR] Warning: HandwritingTranscriber unavailable ({e})")
     def process_image(self, image_path: str) -> Dict[str, Any]:
         """Full OCR pipeline with CJK filtering and confidence scoring."""
         if not os.path.exists(image_path):
         # Final CJK cleanup pass (catches anything that slipped through)
         raw_latex = clean_latex_output(raw_latex)
+        # If no math detected by Pix2Text, try HandwritingTranscriber for InkML
+        if (not raw_latex.strip() or "No math content" in raw_latex) and self.transcriber and image_path.endswith('.inkml'):
+            try:
+                raw_latex, _ = self.transcriber.transcribe_inkml(image_path)
+                print(f"[OCR] Used HandwritingTranscriber for InkML: {raw_latex}")
+            except Exception as e:
+                print(f"[OCR] HandwritingTranscriber error: {e}")
         ocr_conf = calculate_weighted_confidence(raw_latex)
         return {
             "latex_output": raw_latex,
             "weighted_confidence": ocr_conf,
+            "backend": "handwriting" if self.transcriber and image_path.endswith('.inkml') else ("pix2text" if self.model_loaded else "simulation")
         }