Spaces:

heerjtdev
/

feeedback

Running

App Files Files Community

heerjtdev commited on Dec 8, 2025

Commit

e12f847

verified ·

1 Parent(s): a5f01d2

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -0

app.py CHANGED Viewed

@@ -44,6 +44,21 @@ SCALE_FACTOR = 2.0
 OUTPUT_DIR = os.path.join(tempfile.gettempdir(), "yolo_extracted_regions")
 # Detection parameters
 CONF_THRESHOLD = 0.2
 TARGET_CLASSES = ['figure', 'equation']
@@ -203,6 +218,26 @@ def run_yolo_detection_and_count(
@@ -311,6 +346,72 @@ def run_yolo_detection_and_count(
     return page_equations, page_figures, saved_images

 OUTPUT_DIR = os.path.join(tempfile.gettempdir(), "yolo_extracted_regions")
+MODEL_NAME = 'breezedeus/pix2text-mfr-1.5'
+processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
+ort_model = ORTModelForVision2Seq.from_pretrained(MODEL_NAME, use_cache=False)
 # Detection parameters
 CONF_THRESHOLD = 0.2
 TARGET_CLASSES = ['figure', 'equation']
+def extract_images_from_page_in_memory(page) -> Dict[str, str]:
+    """
+    Extract images from a page and return:
+    { "EQUATION1": base64_string, "FIGURE1": base64_string }
+    """
+    image_map = {}
+    image_list = page.get_images(full=True)
+    for idx, img in enumerate(image_list, start=1):
+        xref = img[0]
+        base = page.parent.extract_image(xref)
+        image_bytes = base["image"]
+        base64_img = base64.b64encode(image_bytes).decode("utf-8")
+        # Convention: first image = FIGURE1, second image = EQUATION1 etc
+        # You can tune this if needed
+        image_map[f"FIGURE{idx}"] = base64_img
+    return image_map
     return page_equations, page_figures, saved_images
+def embed_images_as_base64_in_memory(structured_data: List[Dict[str, Any]], pdf_doc) -> List[Dict[str, Any]]:
+    print("\n" + "="*80)
+    print("--- IN-MEMORY IMAGE + EQUATION TO LATEX PIPELINE ---")
+    print("="*80)
+    if not structured_data:
+        return []
+    # Build global image map from all pages (in memory only)
+    full_image_lookup = {}
+    for page_index in range(len(pdf_doc)):
+        page = pdf_doc[page_index]
+        page_images = extract_images_from_page_in_memory(page)
+        for tag, base64_img in page_images.items():
+            full_image_lookup[tag] = base64_img
+    print(f" -> Found {len(full_image_lookup)} total in-memory images.")
+    tag_regex = re.compile(r'(figure|equation)(\d+)', re.IGNORECASE)
+    final_structured_data = []
+    for item in structured_data:
+        text_fields = [
+            item.get('question', ''),
+            item.get('passage', ''),
+            item.get('new_passage', '')
+        ]
+        if 'options' in item:
+            for opt in item['options'].values():
+                text_fields.append(opt)
+        unique_tags = set()
+        for text in text_fields:
+            if not text:
+                continue
+            for match in tag_regex.finditer(text):
+                unique_tags.add(match.group(0).upper())
+        for tag in sorted(unique_tags):
+            base_key = tag.lower().replace(' ', '')
+            if tag not in full_image_lookup:
+                item[base_key] = "[MISSING_IMAGE]"
+                continue
+            base64_img = full_image_lookup[tag]
+            if "EQUATION" in tag:
+                latex = get_latex_from_base64(base64_img)
+                item[base_key] = latex
+                print(f" ✅ {tag} → LaTeX")
+            elif "FIGURE" in tag:
+                item[base_key] = base64_img
+                print(f" ✅ {tag} → Base64")
+        final_structured_data.append(item)
+    print("✅ In-memory embedding completed")
+    return final_structured_data