Spaces:
Sleeping
Sleeping
File size: 6,312 Bytes
3d0c98c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
import os
import sys
# Standard environment setup (keep this)
if "APP_PATH" in os.environ:
app_path = os.path.abspath(os.environ["APP_PATH"])
if os.getcwd() != app_path:
# fix sys.path for import
os.chdir(app_path)
if app_path not in sys.path:
sys.path.append(app_path)
import io
import tempfile
from typing import List
import pypdfium2
import gradio as gr
import requests
from contextlib import suppress
from surya.common.surya.schema import TaskNames
from surya.models import load_predictors
from surya.debug.draw import draw_polys_on_image
from PIL import Image
from surya.layout import LayoutResult
from surya.settings import settings
from surya.common.util import rescale_bbox, expand_bbox
# --- Core Functions (Minimal changes required) ---
# Get page image from PDF (keep this)
def open_pdf(pdf_file):
return pypdfium2.PdfDocument(pdf_file)
def page_counter(pdf_file):
doc = open_pdf(pdf_file)
doc_len = len(doc)
doc.close()
return doc_len
def get_page_image(pdf_file, page_num, dpi=settings.IMAGE_DPI):
doc = open_pdf(pdf_file)
renderer = doc.render(
pypdfium2.PdfBitmap.to_pil,
page_indices=[page_num - 1],
scale=dpi / 72,
)
png = list(renderer)[0]
png_image = png.convert("RGB")
doc.close()
return png_image
def get_uploaded_image(in_file):
return Image.open(in_file).convert("RGB")
# Modified layout_detection to filter for Equation and Figure
def focused_layout_detection(img) -> (Image.Image, LayoutResult):
# Use the existing layout predictor
pred = predictors["layout"]([img])[0]
# Filter for Equation and Figure bounding boxes
filtered_bboxes = [
p
for p in pred.bboxes
if p.label in ["Equation", "Figure"] # <-- Filter applied here
]
# Update the prediction result to only include the filtered boxes
pred.bboxes = filtered_bboxes
# Prepare data for drawing on the image
polygons = [p.polygon for p in filtered_bboxes]
labels = [
f"{p.label}-{p.position}-{round(p.top_k[p.label], 2)}" for p in filtered_bboxes
]
# Draw the filtered polygons
layout_img = draw_polys_on_image(
polygons, img.copy(), labels=labels, label_font_size=18
)
return layout_img, pred
# Load models (keep this)
predictors = load_predictors()
# --- Gradio Interface (Significantly simplified) ---
with gr.Blocks(title="Surya Equation/Figure Detector") as demo:
gr.Markdown("""
# Surya Equation and Figure Detection
This application uses Surya OCR's layout analysis model to **specifically detect and locate Equations and Figures** within a document page.
The output provides an image with bounding boxes drawn, and the raw JSON bounding box information for the detected elements.
Find the original project [here](https://github.com/VikParuchuri/surya).
""")
with gr.Row():
with gr.Column():
in_file = gr.File(label="PDF file or image:", file_types=[".pdf", ".png", ".jpg", ".jpeg", ".gif", ".webp"])
in_num = gr.Slider(label="Page number", minimum=1, maximum=100, value=1, step=1)
in_img = gr.Image(label="Select page of Image", type="pil", sources=None)
# Keep only the essential button
detection_btn = gr.Button("Run Equation and Figure Detection")
with gr.Column():
result_img = gr.Gallery(label="Result image: Detected Equations and Figures", show_label=True,
elem_id="gallery", columns=[1], rows=[1], object_fit="contain", height="auto")
gr.HTML("""
<style>
#gallery {
height: auto !important;
max-height: none !important;
overflow: visible !important;
}
#gallery .gallery-item {
flex-direction: column !important;
}
#gallery .gallery-item img {
width: 100% !important;
height: auto !important;
object-fit: contain !important;
}
</style>
""")
result_json = gr.JSON(label="Result JSON (Bounding Box Data)")
# Page Loading Logic (keep this)
def show_image(file, num=1):
if file.endswith('.pdf'):
count = page_counter(file)
img = get_page_image(file, num, settings.IMAGE_DPI)
return [
gr.update(visible=True, maximum=count),
gr.update(value=img)]
else:
img = get_uploaded_image(file)
return [
gr.update(visible=False),
gr.update(value=img)]
in_file.upload(
fn=show_image,
inputs=[in_file],
outputs=[in_num, in_img],
)
in_num.change(
fn=show_image,
inputs=[in_file, in_num],
outputs=[in_num, in_img],
)
# Run Focused Detection
def run_focused_detection(pil_image):
# update counter
with suppress(Exception):
requests.get("https://counterapi.com/api/xiaoyao9184.github.com/view/docker-surya")
layout_img, pred = focused_layout_detection(pil_image)
# Exclude the large segmentation map from the JSON output
layout_json = pred.model_dump(exclude=["segmentation_map"])
# Count the filtered results
num_boxes = len(layout_json.get('bboxes', []))
return (
gr.update(label=f"Result image: {num_boxes} Equations/Figures detected", value=[layout_img], rows=[1], height=layout_img.height),
gr.update(label=f"Result JSON: {num_boxes} Equations/Figures detected", value=layout_json)
)
detection_btn.click(
fn=run_focused_detection,
inputs=[in_img],
outputs=[result_img, result_json]
)
if __name__ == "__main__":
demo.launch() |