File size: 6,312 Bytes
3d0c98c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import os
import sys

# Standard environment setup (keep this)
if "APP_PATH" in os.environ:
    app_path = os.path.abspath(os.environ["APP_PATH"])
    if os.getcwd() != app_path:
        # fix sys.path for import
        os.chdir(app_path)
    if app_path not in sys.path:
        sys.path.append(app_path)

import io
import tempfile
from typing import List

import pypdfium2
import gradio as gr
import requests
from contextlib import suppress

from surya.common.surya.schema import TaskNames
from surya.models import load_predictors

from surya.debug.draw import draw_polys_on_image
from PIL import Image
from surya.layout import LayoutResult
from surya.settings import settings
from surya.common.util import rescale_bbox, expand_bbox


# --- Core Functions (Minimal changes required) ---

# Get page image from PDF (keep this)
def open_pdf(pdf_file):
    return pypdfium2.PdfDocument(pdf_file)

def page_counter(pdf_file):
    doc = open_pdf(pdf_file)
    doc_len = len(doc)
    doc.close()
    return doc_len

def get_page_image(pdf_file, page_num, dpi=settings.IMAGE_DPI):
    doc = open_pdf(pdf_file)
    renderer = doc.render(
        pypdfium2.PdfBitmap.to_pil,
        page_indices=[page_num - 1],
        scale=dpi / 72,
    )
    png = list(renderer)[0]
    png_image = png.convert("RGB")
    doc.close()
    return png_image

def get_uploaded_image(in_file):
    return Image.open(in_file).convert("RGB")

# Modified layout_detection to filter for Equation and Figure
def focused_layout_detection(img) -> (Image.Image, LayoutResult):
    # Use the existing layout predictor
    pred = predictors["layout"]([img])[0]
    
    # Filter for Equation and Figure bounding boxes
    filtered_bboxes = [
        p
        for p in pred.bboxes
        if p.label in ["Equation", "Figure"] # <-- Filter applied here
    ]
    
    # Update the prediction result to only include the filtered boxes
    pred.bboxes = filtered_bboxes
    
    # Prepare data for drawing on the image
    polygons = [p.polygon for p in filtered_bboxes]
    labels = [
        f"{p.label}-{p.position}-{round(p.top_k[p.label], 2)}" for p in filtered_bboxes
    ]
    
    # Draw the filtered polygons
    layout_img = draw_polys_on_image(
        polygons, img.copy(), labels=labels, label_font_size=18
    )
    
    return layout_img, pred


# Load models (keep this)
predictors = load_predictors()


# --- Gradio Interface (Significantly simplified) ---

with gr.Blocks(title="Surya Equation/Figure Detector") as demo:
    gr.Markdown("""

    # Surya Equation and Figure Detection



    This application uses Surya OCR's layout analysis model to **specifically detect and locate Equations and Figures** within a document page.



    The output provides an image with bounding boxes drawn, and the raw JSON bounding box information for the detected elements.



    Find the original project [here](https://github.com/VikParuchuri/surya).

    """)

    with gr.Row():
        with gr.Column():
            in_file = gr.File(label="PDF file or image:", file_types=[".pdf", ".png", ".jpg", ".jpeg", ".gif", ".webp"])
            in_num = gr.Slider(label="Page number", minimum=1, maximum=100, value=1, step=1)
            in_img = gr.Image(label="Select page of Image", type="pil", sources=None)

            # Keep only the essential button
            detection_btn = gr.Button("Run Equation and Figure Detection")
            
        with gr.Column():
            result_img = gr.Gallery(label="Result image: Detected Equations and Figures", show_label=True, 
                elem_id="gallery", columns=[1], rows=[1], object_fit="contain", height="auto")

            gr.HTML("""

            <style>

            #gallery {

                height: auto !important;

                max-height: none !important;

                overflow: visible !important;

            }

            #gallery .gallery-item {

                flex-direction: column !important;

            }

            #gallery .gallery-item img {

                width: 100% !important;

                height: auto !important;

                object-fit: contain !important;

            }

            </style>

            """)
            result_json = gr.JSON(label="Result JSON (Bounding Box Data)")

        # Page Loading Logic (keep this)
        def show_image(file, num=1):
            if file.endswith('.pdf'):
                count = page_counter(file)
                img = get_page_image(file, num, settings.IMAGE_DPI)
                return [
                    gr.update(visible=True, maximum=count),
                    gr.update(value=img)]
            else:
                img = get_uploaded_image(file)
                return [
                    gr.update(visible=False),
                    gr.update(value=img)]

        in_file.upload(
            fn=show_image,
            inputs=[in_file],
            outputs=[in_num, in_img],
        )
        in_num.change(
            fn=show_image,
            inputs=[in_file, in_num],
            outputs=[in_num, in_img],
        )

        # Run Focused Detection
        def run_focused_detection(pil_image):
            # update counter
            with suppress(Exception):
                requests.get("https://counterapi.com/api/xiaoyao9184.github.com/view/docker-surya")

            layout_img, pred = focused_layout_detection(pil_image)
            # Exclude the large segmentation map from the JSON output
            layout_json = pred.model_dump(exclude=["segmentation_map"]) 
            
            # Count the filtered results
            num_boxes = len(layout_json.get('bboxes', []))
            
            return (
                gr.update(label=f"Result image: {num_boxes} Equations/Figures detected", value=[layout_img], rows=[1], height=layout_img.height),
                gr.update(label=f"Result JSON: {num_boxes} Equations/Figures detected", value=layout_json)
            )
        
        detection_btn.click(
            fn=run_focused_detection,
            inputs=[in_img],
            outputs=[result_img, result_json]
        )


if __name__ == "__main__":
    demo.launch()