Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| # Standard environment setup (keep this) | |
| if "APP_PATH" in os.environ: | |
| app_path = os.path.abspath(os.environ["APP_PATH"]) | |
| if os.getcwd() != app_path: | |
| # fix sys.path for import | |
| os.chdir(app_path) | |
| if app_path not in sys.path: | |
| sys.path.append(app_path) | |
| import io | |
| import tempfile | |
| from typing import List | |
| import pypdfium2 | |
| import gradio as gr | |
| import requests | |
| from contextlib import suppress | |
| from surya.common.surya.schema import TaskNames | |
| from surya.models import load_predictors | |
| from surya.debug.draw import draw_polys_on_image | |
| from PIL import Image | |
| from surya.layout import LayoutResult | |
| from surya.settings import settings | |
| from surya.common.util import rescale_bbox, expand_bbox | |
| # --- Core Functions (Minimal changes required) --- | |
| # Get page image from PDF (keep this) | |
| def open_pdf(pdf_file): | |
| return pypdfium2.PdfDocument(pdf_file) | |
| def page_counter(pdf_file): | |
| doc = open_pdf(pdf_file) | |
| doc_len = len(doc) | |
| doc.close() | |
| return doc_len | |
| def get_page_image(pdf_file, page_num, dpi=settings.IMAGE_DPI): | |
| doc = open_pdf(pdf_file) | |
| renderer = doc.render( | |
| pypdfium2.PdfBitmap.to_pil, | |
| page_indices=[page_num - 1], | |
| scale=dpi / 72, | |
| ) | |
| png = list(renderer)[0] | |
| png_image = png.convert("RGB") | |
| doc.close() | |
| return png_image | |
| def get_uploaded_image(in_file): | |
| return Image.open(in_file).convert("RGB") | |
| # Modified layout_detection to filter for Equation and Figure | |
| def focused_layout_detection(img) -> (Image.Image, LayoutResult): | |
| # Use the existing layout predictor | |
| pred = predictors["layout"]([img])[0] | |
| # Filter for Equation and Figure bounding boxes | |
| filtered_bboxes = [ | |
| p | |
| for p in pred.bboxes | |
| if p.label in ["Equation", "Figure"] # <-- Filter applied here | |
| ] | |
| # Update the prediction result to only include the filtered boxes | |
| pred.bboxes = filtered_bboxes | |
| # Prepare data for drawing on the image | |
| polygons = [p.polygon for p in filtered_bboxes] | |
| labels = [ | |
| f"{p.label}-{p.position}-{round(p.top_k[p.label], 2)}" for p in filtered_bboxes | |
| ] | |
| # Draw the filtered polygons | |
| layout_img = draw_polys_on_image( | |
| polygons, img.copy(), labels=labels, label_font_size=18 | |
| ) | |
| return layout_img, pred | |
| # Load models (keep this) | |
| predictors = load_predictors() | |
| # --- Gradio Interface (Significantly simplified) --- | |
| with gr.Blocks(title="Surya Equation/Figure Detector") as demo: | |
| gr.Markdown(""" | |
| # Surya Equation and Figure Detection | |
| This application uses Surya OCR's layout analysis model to **specifically detect and locate Equations and Figures** within a document page. | |
| The output provides an image with bounding boxes drawn, and the raw JSON bounding box information for the detected elements. | |
| Find the original project [here](https://github.com/VikParuchuri/surya). | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| in_file = gr.File(label="PDF file or image:", file_types=[".pdf", ".png", ".jpg", ".jpeg", ".gif", ".webp"]) | |
| in_num = gr.Slider(label="Page number", minimum=1, maximum=100, value=1, step=1) | |
| in_img = gr.Image(label="Select page of Image", type="pil", sources=None) | |
| # Keep only the essential button | |
| detection_btn = gr.Button("Run Equation and Figure Detection") | |
| with gr.Column(): | |
| result_img = gr.Gallery(label="Result image: Detected Equations and Figures", show_label=True, | |
| elem_id="gallery", columns=[1], rows=[1], object_fit="contain", height="auto") | |
| gr.HTML(""" | |
| <style> | |
| #gallery { | |
| height: auto !important; | |
| max-height: none !important; | |
| overflow: visible !important; | |
| } | |
| #gallery .gallery-item { | |
| flex-direction: column !important; | |
| } | |
| #gallery .gallery-item img { | |
| width: 100% !important; | |
| height: auto !important; | |
| object-fit: contain !important; | |
| } | |
| </style> | |
| """) | |
| result_json = gr.JSON(label="Result JSON (Bounding Box Data)") | |
| # Page Loading Logic (keep this) | |
| def show_image(file, num=1): | |
| if file.endswith('.pdf'): | |
| count = page_counter(file) | |
| img = get_page_image(file, num, settings.IMAGE_DPI) | |
| return [ | |
| gr.update(visible=True, maximum=count), | |
| gr.update(value=img)] | |
| else: | |
| img = get_uploaded_image(file) | |
| return [ | |
| gr.update(visible=False), | |
| gr.update(value=img)] | |
| in_file.upload( | |
| fn=show_image, | |
| inputs=[in_file], | |
| outputs=[in_num, in_img], | |
| ) | |
| in_num.change( | |
| fn=show_image, | |
| inputs=[in_file, in_num], | |
| outputs=[in_num, in_img], | |
| ) | |
| # Run Focused Detection | |
| def run_focused_detection(pil_image): | |
| # update counter | |
| with suppress(Exception): | |
| requests.get("https://counterapi.com/api/xiaoyao9184.github.com/view/docker-surya") | |
| layout_img, pred = focused_layout_detection(pil_image) | |
| # Exclude the large segmentation map from the JSON output | |
| layout_json = pred.model_dump(exclude=["segmentation_map"]) | |
| # Count the filtered results | |
| num_boxes = len(layout_json.get('bboxes', [])) | |
| return ( | |
| gr.update(label=f"Result image: {num_boxes} Equations/Figures detected", value=[layout_img], rows=[1], height=layout_img.height), | |
| gr.update(label=f"Result JSON: {num_boxes} Equations/Figures detected", value=layout_json) | |
| ) | |
| detection_btn.click( | |
| fn=run_focused_detection, | |
| inputs=[in_img], | |
| outputs=[result_img, result_json] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |