Spaces:

heerjtdev
/

feeedback

Running

App Files Files Community

heerjtdev commited on 30 days ago

Commit

06eecf1

verified ·

1 Parent(s): 4a3866a

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -147

app.py CHANGED Viewed

@@ -578,8 +578,6 @@
 # )
 import base64
 from PIL import Image
 import re
@@ -594,14 +592,17 @@ from typing import Optional, Tuple, List, Dict, Any, Union
 from ultralytics import YOLO
 import logging
 import gradio as gr
-import shutil
-import tempfile
 import io
 # ============================================================================
-# --- Global Patches and Setup ---
 # ============================================================================
 # Patch torch.load to prevent weights_only error with older models
 _original_torch_load = torch.load
 def patched_torch_load(*args, **kwargs):
@@ -609,12 +610,6 @@ def patched_torch_load(*args, **kwargs):
     return _original_torch_load(*args, **kwargs)
 torch.load = patched_torch_load
-logging.basicConfig(level=logging.WARNING)
-# ============================================================================
-# --- CONFIGURATION AND CONSTANTS ---
-# ============================================================================
 WEIGHTS_PATH = 'best.pt'
 SCALE_FACTOR = 2.0
@@ -628,7 +623,7 @@ try:
     ort_model = ORTModelForVision2Seq.from_pretrained(MODEL_NAME, use_cache=False)
     OCR_MODEL_LOADED = True
 except Exception as e:
-    logging.warning(f"OCR model loading failed (expected if dependencies are missing): {e}")
     processor = None
     ort_model = None
     OCR_MODEL_LOADED = False
@@ -707,10 +702,12 @@ def merge_overlapping_boxes(detections, iou_threshold):
             'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
             'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
         })
     return merged_detections
 # ============================================================================
-# --- UTILITY FUNCTIONS (UPDATED) ---
 # ============================================================================
 def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
@@ -741,42 +738,59 @@ def crop_and_convert_to_pil(image: np.ndarray, bbox: Tuple[float, float, float,
     return crop_pil
-# --- NEW: Utility to convert PIL Image to Base64 (for OCR input) ---
 def pil_to_base64(img: Image.Image) -> str:
-    """Converts a PIL Image object to a Base64 encoded string (PNG format)."""
     buffer = io.BytesIO()
     img.save(buffer, format="PNG")
     return base64.b64encode(buffer.getvalue()).decode("utf-8")
-# --- UPDATED: run_yolo_detection_and_count to return a list of dictionaries with PIL images ---
 def run_yolo_detection_and_count(
         image: np.ndarray, model: YOLO, page_num: int,
         current_eq_count: int, current_fig_count: int
-) -> Tuple[int, int, List[Dict[str, Union[Image.Image, str]]], int, int]:
     """
-    Performs YOLO detection and returns page counts, detected items (as dicts
-    containing the PIL Image), and the updated total counters.
     """
     eq_counter = current_eq_count
     fig_counter = current_fig_count
-    page_equations = 0
-    page_figures = 0
-    # Change: detected_items now holds dictionaries: {'type', 'id', 'pil_image'}
-    detected_items: List[Dict[str, Union[Image.Image, str]]] = []
     yolo_detections = []
-    # ... (YOLO inference logic is the same)
     try:
         results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
         if results and results[0].boxes:
             for box in results[0].boxes.data.tolist():
                 x1, y1, x2, y2, conf, cls_id = box
                 cls_name = model.names[int(cls_id)]
                 if cls_name in TARGET_CLASSES:
                     yolo_detections.append({
                         'coords': (x1, y1, x2, y2),
@@ -784,108 +798,61 @@ def run_yolo_detection_and_count(
                         'conf': conf
                     })
     except Exception as e:
-        logging.error(f"YOLO inference failed on page {page_num}: {e}")
-        return 0, 0, [], eq_counter, fig_counter
     merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
     final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
     for det in final_detections:
         bbox = det["coords"]
         crop_pil = crop_and_convert_to_pil(image, bbox)
         if det["class"] == "equation":
             eq_counter += 1
-            page_equations += 1
-            detected_items.append({
-                "type": "equation",
-                "id": f"EQUATION{eq_counter}",
-                "pil_image": crop_pil,
-                "latex": "" # Placeholder for OCR result
-            })
         elif det["class"] == "figure":
             fig_counter += 1
-            page_figures += 1
-            detected_items.append({
-                "type": "figure",
-                "id": f"FIGURE{fig_counter}",
-                "pil_image": crop_pil,
-                "latex": "[FIGURE - No LaTeX]" # Figures don't get OCR
-            })
-    logging.warning(f"  -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
-    return page_equations, page_figures, detected_items, eq_counter, fig_counter
-def get_latex_from_base64(base64_string: str) -> str:
-    """
-    Performs the OCR conversion. Expects Base64 string input.
-    """
-    if not OCR_MODEL_LOADED:
-        return "[MODEL_ERROR: Model not initialized or failed to load]"
-    try:
-        # OCR logic (unchanged)
-        image_data = base64.b64decode(base64_string)
-        image = Image.open(io.BytesIO(image_data)).convert('RGB')
-        pixel_values = processor(images=image, return_tensors="pt").pixel_values
-        generated_ids = ort_model.generate(pixel_values)
-        raw_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
-        if not raw_text:
-            return "[OCR_WARNING: No formula found]"
-        latex = raw_text[0]
-        latex = re.sub(r'[\r\n]+', '', latex)
-        return latex
-    except Exception as e:
-        return f"[TR_OCR_ERROR: {e}]"
-# --- UNUSED ORIGINAL FUNCTIONS RETAINED FOR COMPLETENESS ---
-def extract_images_from_page_in_memory(page) -> Dict[str, str]:
-    # ... (body retained)
-    pass
-def embed_images_as_base64_in_memory(structured_data, detected_items):
-    # ... (body retained)
-    pass
-def crop_and_convert_to_base64(image: np.ndarray, bbox: Tuple[float, float, float, float]) -> str:
-    # ... (body retained)
-    pass
 # ============================================================================
-# --- MAIN DOCUMENT PROCESSING FUNCTION (UPDATED for OCR) ---
 # ============================================================================
 def run_single_pdf_preprocessing(
     pdf_path: str
-) -> Tuple[int, int, int, str, float, Dict[str, int], List[Tuple[Image.Image, str]]]:
     """
     Runs the pipeline, performs OCR, and returns final results.
     """
     start_time = time.time()
-    log_messages = []
-    # This will store all final extracted item dicts (image, ID, type, LATEX)
     all_extracted_items: List[Dict[str, Union[Image.Image, str]]] = []
-    equation_counts_per_page: Dict[int, int] = {}
     total_figure_count = 0
     total_equation_count = 0
     # 1. Validation and Model Loading (YOLO)
     t0 = time.time()
     if not os.path.exists(pdf_path):
         report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
@@ -893,24 +860,24 @@ def run_single_pdf_preprocessing(
     try:
         model = YOLO(WEIGHTS_PATH)
-        logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
     except Exception as e:
         report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
         return 0, 0, 0, report, time.time() - start_time, {}, []
     t1 = time.time()
-    log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
     # 2. PDF Loading (fitz)
     t2 = time.time()
     try:
         doc = fitz.open(pdf_path)
         total_pages = doc.page_count
-        logging.warning(f"✅ Opened PDF with {doc.page_count} pages")
     except Exception as e:
         report = f"❌ ERROR loading PDF file: {e}"
         return 0, 0, 0, report, time.time() - start_time, {}, []
     t3 = time.time()
-    log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
     mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
@@ -922,22 +889,19 @@ def run_single_pdf_preprocessing(
         page_num = page_num_0_based + 1
         # Render page to image for YOLO
-        # ... (image rendering logic retained)
         try:
             pix_start = time.time()
             pix = fitz_page.get_pixmap(matrix=mat)
             original_img = pixmap_to_numpy(pix)
             pix_time = time.time() - pix_start
         except Exception as e:
-            logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
             continue
         # YOLO Detection
         detect_start = time.time()
         (
-            page_equations,
-            page_figures,
-            page_extracted_items, # List of dicts: {'type', 'id', 'pil_image', 'latex'}
             total_equation_count,
             total_figure_count
         ) = run_yolo_detection_and_count(
@@ -947,44 +911,53 @@ def run_single_pdf_preprocessing(
             total_equation_count,
             total_figure_count
         )
-        # --- NEW: OCR/LaTeX Conversion for Equations ---
-        ocr_start = time.time()
         for item in page_extracted_items:
             if item["type"] == "equation":
-                # 1. Convert PIL image to Base64
-                b64_string = pil_to_base64(item["pil_image"])
-                # 2. Run OCR
                 item["latex"] = get_latex_from_base64(b64_string)
-                # OPTIONAL: Clean up large image data if memory is a concern
-                # del item["pil_image"]
-        ocr_time = time.time() - ocr_start
-        # Append all extracted item dictionaries
         all_extracted_items.extend(page_extracted_items)
-        detect_time = time.time() - detect_start
-        # Store the count in the dictionary (INT keys)
-        equation_counts_per_page[page_num] = page_equations
         page_total_time = time.time() - page_start_time
-        log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s, OCR={ocr_time:.4f}s)")
     doc.close()
     t5 = time.time()
     detection_loop_time = t5 - t4
-    log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
     # 4. Final Report Generation and Gallery Formatting
     # Format the extracted items for the Gradio Gallery
     gallery_items: List[Tuple[Image.Image, str]] = []
-    # We will include the LATEX code as the image label in the gallery
-    # If the item is a Figure, the label is just the ID.
     for item in all_extracted_items:
         image_label = item["id"]
         if item["type"] == "equation":
@@ -995,10 +968,7 @@ def run_single_pdf_preprocessing(
     total_execution_time = t5 - start_time
-    # Convert integer keys to string keys for JSON serialization
-    equation_counts_per_page_str_keys: Dict[str, int] = {
-        str(k): v for k, v in equation_counts_per_page.items()
-    }
     report = (
         f"✅ **YOLO Counting & OCR Complete!**\n\n"
@@ -1007,23 +977,22 @@ def run_single_pdf_preprocessing(
         f"**3) Total Figures Detected:** **{total_figure_count}**\n"
         f"---\n"
         f"**4) Total Execution Time:** **{total_execution_time:.4f}s**\n"
-        f"### Detailed Step Timing\n"
-        f"```\n"
-        + "\n".join(log_messages) +
         f"\n```"
     )
-    return total_pages, total_equation_count, total_figure_count, report, total_execution_time, equation_counts_per_page_str_keys, gallery_items
 # ============================================================================
-# --- GRADIO INTERFACE FUNCTION & DEFINITION (Retained) ---
 # ============================================================================
-def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, int], List[Tuple[Image.Image, str]]]:
-    """
-    Gradio wrapper function to handle file upload and return results.
-    """
     if pdf_file is None:
         return "N/A", "N/A", "N/A", "Please upload a PDF file.", {}, []
@@ -1036,18 +1005,20 @@ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, int], Li
             num_figures,
             report,
             total_time,
-            equation_counts_per_page,
             gallery_items
         ) = run_single_pdf_preprocessing(pdf_path)
-        return str(num_pages), str(num_equations), str(num_figures), report, equation_counts_per_page, gallery_items
     except Exception as e:
         error_msg = f"An unexpected error occurred: {e}"
-        logging.error(error_msg, exc_info=True)
-        return "Error", "Error", "Error", error_msg, {}, []
 if __name__ == "__main__":
@@ -1057,16 +1028,14 @@ if __name__ == "__main__":
     input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
-    # Outputs
     output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
     output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
     output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
-    output_report = gr.Markdown(label="Processing Summary and Timing")
-    # NEW OUTPUT: JSON component for structured data
-    output_page_counts = gr.JSON(label="Equation Count Per Page (Dictionary)")
-    # Gradio Gallery now shows the LaTeX code as the label
     output_gallery = gr.Gallery(
         label="Detected Items (with Extracted LaTeX)",
         columns=3,
@@ -1083,12 +1052,12 @@ if __name__ == "__main__":
             output_equations,
             output_figures,
             output_report,
-            output_page_counts,
             output_gallery
         ],
-        title="📊 YOLO Detection & Math OCR Pipeline",
         description=(
-            "Upload a PDF. YOLO detects equations, and the TrOCR model converts them to LaTeX."
         ),
     )

 # )
 import base64
 from PIL import Image
 import re
 from ultralytics import YOLO
 import logging
 import gradio as gr
 import io
+import json
 # ============================================================================
+# --- Global Setup and Configuration ---
 # ============================================================================
+# Configure logging to write to a string buffer for display in the report
+log_stream = io.StringIO()
+logging.basicConfig(level=logging.WARNING, stream=log_stream, format='%(levelname)s:%(message)s')
 # Patch torch.load to prevent weights_only error with older models
 _original_torch_load = torch.load
 def patched_torch_load(*args, **kwargs):
     return _original_torch_load(*args, **kwargs)
 torch.load = patched_torch_load
 WEIGHTS_PATH = 'best.pt'
 SCALE_FACTOR = 2.0
     ort_model = ORTModelForVision2Seq.from_pretrained(MODEL_NAME, use_cache=False)
     OCR_MODEL_LOADED = True
 except Exception as e:
+    logging.warning(f"OCR model loading failed: {e}")
     processor = None
     ort_model = None
     OCR_MODEL_LOADED = False
             'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
             'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
         })
+    # This step ensures top-to-bottom reading order for sequential numbering (EQUATION1, EQUATION2, etc.)
+    merged_detections.sort(key=lambda d: d['y1'])
     return merged_detections
 # ============================================================================
+# --- UTILITY FUNCTIONS ---
 # ============================================================================
 def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
     return crop_pil
 def pil_to_base64(img: Image.Image) -> str:
+    """Converts a PIL Image object to a Base64 encoded string (PNG format) for OCR input."""
     buffer = io.BytesIO()
     img.save(buffer, format="PNG")
     return base64.b64encode(buffer.getvalue()).decode("utf-8")
+def get_latex_from_base64(base64_string: str) -> str:
+    """Performs the OCR conversion using the globally loaded model."""
+    if not OCR_MODEL_LOADED:
+        return "[MODEL_ERROR: Model not loaded]"
+    try:
+        image_data = base64.b64decode(base64_string)
+        image = Image.open(io.BytesIO(image_data)).convert('RGB')
+        pixel_values = processor(images=image, return_tensors="pt").pixel_values
+        generated_ids = ort_model.generate(pixel_values)
+        raw_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+        if not raw_text:
+            return "[OCR_WARNING: No formula found]"
+        latex = raw_text[0]
+        latex = re.sub(r'[\r\n]+', '', latex)
+        return latex
+    except Exception as e:
+        return f"[TR_OCR_ERROR: {e}]"
 def run_yolo_detection_and_count(
         image: np.ndarray, model: YOLO, page_num: int,
         current_eq_count: int, current_fig_count: int
+) -> Tuple[List[Dict[str, Union[Image.Image, str, Tuple[float,...]]]], int, int]:
     """
+    Performs YOLO detection and returns a list of detected item dictionaries
+    and the updated total counters.
     """
     eq_counter = current_eq_count
     fig_counter = current_fig_count
+    detected_items: List[Dict[str, Union[Image.Image, str, Tuple[float,...]]]] = []
     yolo_detections = []
     try:
         results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
         if results and results[0].boxes:
             for box in results[0].boxes.data.tolist():
                 x1, y1, x2, y2, conf, cls_id = box
                 cls_name = model.names[int(cls_id)]
                 if cls_name in TARGET_CLASSES:
                     yolo_detections.append({
                         'coords': (x1, y1, x2, y2),
                         'conf': conf
                     })
     except Exception as e:
+        logging.error(f"ERROR: YOLO inference failed on page {page_num}: {e}")
+        return [], eq_counter, fig_counter
     merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
     final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
     for det in final_detections:
         bbox = det["coords"]
         crop_pil = crop_and_convert_to_pil(image, bbox)
+        item = {
+            "type": det["class"],
+            "coords": bbox,
+            "pil_image": crop_pil,
+        }
         if det["class"] == "equation":
             eq_counter += 1
+            item["id"] = f"EQUATION{eq_counter}"
+            item["latex"] = ""
         elif det["class"] == "figure":
             fig_counter += 1
+            item["id"] = f"FIGURE{fig_counter}"
+            item["latex"] = "[FIGURE - No LaTeX]"
+        detected_items.append(item)
+    return detected_items, eq_counter, fig_counter
 # ============================================================================
+# --- MAIN DOCUMENT PROCESSING FUNCTION (MODIFIED OUTPUT) ---
 # ============================================================================
+# The return type is updated to reflect the new structured output dictionary
 def run_single_pdf_preprocessing(
     pdf_path: str
+) -> Tuple[int, int, int, str, float, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
     """
     Runs the pipeline, performs OCR, and returns final results.
     """
+    log_stream.truncate(0)
+    log_stream.seek(0)
     start_time = time.time()
     all_extracted_items: List[Dict[str, Union[Image.Image, str]]] = []
     total_figure_count = 0
     total_equation_count = 0
     # 1. Validation and Model Loading (YOLO)
+    # ... (Model loading logic retained)
     t0 = time.time()
     if not os.path.exists(pdf_path):
         report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
     try:
         model = YOLO(WEIGHTS_PATH)
+        logging.warning(f"INFO: Loaded YOLO model from: {WEIGHTS_PATH}")
     except Exception as e:
         report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
         return 0, 0, 0, report, time.time() - start_time, {}, []
     t1 = time.time()
+    logging.warning(f"INFO: Model Loading Time: {t1-t0:.4f}s")
     # 2. PDF Loading (fitz)
     t2 = time.time()
     try:
         doc = fitz.open(pdf_path)
         total_pages = doc.page_count
+        logging.warning(f"INFO: Opened PDF with {doc.page_count} pages")
     except Exception as e:
         report = f"❌ ERROR loading PDF file: {e}"
         return 0, 0, 0, report, time.time() - start_time, {}, []
     t3 = time.time()
+    logging.warning(f"INFO: PDF Initialization Time: {t3-t2:.4f}s")
     mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
         page_num = page_num_0_based + 1
         # Render page to image for YOLO
         try:
             pix_start = time.time()
             pix = fitz_page.get_pixmap(matrix=mat)
             original_img = pixmap_to_numpy(pix)
             pix_time = time.time() - pix_start
         except Exception as e:
+            logging.error(f"ERROR: Error converting page {page_num} to image: {e}. Skipping.")
             continue
         # YOLO Detection
         detect_start = time.time()
         (
+            page_extracted_items,
             total_equation_count,
             total_figure_count
         ) = run_yolo_detection_and_count(
             total_equation_count,
             total_figure_count
         )
+        detect_time = time.time() - detect_start
+        # --- OCR/LaTeX Conversion and Logging ---
+        ocr_total_time = 0
+        page_equations = 0
         for item in page_extracted_items:
             if item["type"] == "equation":
+                page_equations += 1
+                ocr_start = time.time()
+                b64_string = pil_to_base64(item["pil_image"])
                 item["latex"] = get_latex_from_base64(b64_string)
+                ocr_time = time.time() - ocr_start
+                ocr_total_time += ocr_time
+                logging.warning(f"LATEX: Page {page_num}, ID {item['id']} -> Time: {ocr_time:.4f}s, Formula: {item['latex'][:50]}...")
         all_extracted_items.extend(page_extracted_items)
+        page_figures = sum(1 for item in page_extracted_items if item["type"] == "figure")
         page_total_time = time.time() - page_start_time
+        logging.warning(f"SUMMARY: Page {page_num}: EQs={page_equations}, Figs={page_figures} | Page Time: {page_total_time:.4f}s (Detect={detect_time:.4f}s, OCR Total={ocr_total_time:.4f}s)")
     doc.close()
     t5 = time.time()
     detection_loop_time = t5 - t4
+    logging.warning(f"INFO: Total Detection and OCR Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
     # 4. Final Report Generation and Gallery Formatting
+    # --- NEW: Create the structured JSON output as requested by the user ---
+    structured_latex_output = {
+        "Total Pages": total_pages,
+        "Total Equations": total_equation_count,
+    }
+    for item in all_extracted_items:
+        if item["type"] == "equation":
+            # Map EQUATION ID to LaTeX code
+            structured_latex_output[item["id"]] = item["latex"]
     # Format the extracted items for the Gradio Gallery
     gallery_items: List[Tuple[Image.Image, str]] = []
     for item in all_extracted_items:
         image_label = item["id"]
         if item["type"] == "equation":
     total_execution_time = t5 - start_time
+    full_log = log_stream.getvalue()
     report = (
         f"✅ **YOLO Counting & OCR Complete!**\n\n"
         f"**3) Total Figures Detected:** **{total_figure_count}**\n"
         f"---\n"
         f"**4) Total Execution Time:** **{total_execution_time:.4f}s**\n"
+        f"### Full Processing Log\n"
+        f"```text\n"
+        f"{full_log}"
         f"\n```"
     )
+    # Return the new structured_latex_output instead of the page counts
+    return total_pages, total_equation_count, total_figure_count, report, total_execution_time, structured_latex_output, gallery_items
 # ============================================================================
+# --- GRADIO INTERFACE FUNCTION & DEFINITION (MODIFIED OUTPUT) ---
 # ============================================================================
+def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
+    """Gradio wrapper function to handle file upload and return results."""
     if pdf_file is None:
         return "N/A", "N/A", "N/A", "Please upload a PDF file.", {}, []
             num_figures,
             report,
             total_time,
+            structured_latex_output, # Variable name changed to match the new output
             gallery_items
         ) = run_single_pdf_preprocessing(pdf_path)
+        return str(num_pages), str(num_equations), str(num_figures), report, structured_latex_output, gallery_items
     except Exception as e:
         error_msg = f"An unexpected error occurred: {e}"
+        logging.error(f"FATAL: {error_msg}", exc_info=True)
+        full_log = log_stream.getvalue()
+        error_report = f"❌ CRITICAL ERROR:\n{error_msg}\n\n### Log up to Failure\n```text\n{full_log}\n```"
+        return "Error", "Error", "Error", error_report, {}, []
 if __name__ == "__main__":
     input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
     output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
     output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
     output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
+    output_report = gr.Markdown(label="Processing Summary and Full Log")
+    # This JSON component now displays the structured output requested by the user
+    output_structured_latex = gr.JSON(label="Structured LaTeX Output (EQUATIONx : <latex code>)")
     output_gallery = gr.Gallery(
         label="Detected Items (with Extracted LaTeX)",
         columns=3,
             output_equations,
             output_figures,
             output_report,
+            output_structured_latex, # Updated component
             output_gallery
         ],
+        title="📊 YOLO Detection & Math OCR Pipeline (Structured Output)",
         description=(
+            "Upload a PDF. YOLO detects equations/figures, and OCR converts equations to LaTeX. See the Structured LaTeX Output panel for the requested format."
         ),
     )