Shreyas094's picture
Update app.py
1563c2d verified
import gradio as gr
from paddleocr import PPStructure, save_structure_res
import cv2
import os
import numpy as np
import shutil
import uuid
from PIL import Image
# Initialize the PP-Structure table model once to avoid reloading for each request
table_engine = PPStructure(
show_log=False,
image_orientation=True,
)
def extract_table(image):
# Generate a unique ID for the session to avoid conflicts
session_id = str(uuid.uuid4())
save_folder = os.path.join('./output', session_id)
os.makedirs(save_folder, exist_ok=True)
# Convert PIL Image to OpenCV format
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# Perform table recognition on the image
result = table_engine(image_cv)
# Save the recognized table data
img_name = 'input_image' # You can customize this as needed
save_structure_res(result, save_folder, img_name)
# The save_structure_res function saves an Excel file with extension .xlsx
excel_file = os.path.join(save_folder, img_name, 'excel', img_name + '.xlsx')
if os.path.exists(excel_file):
# Return the path to the Excel file for download
return excel_file
else:
# Handle case where no table was detected
return "No table detected in the image."
# Define the Gradio interface
iface = gr.Interface(
fn=extract_table,
inputs=gr.Image(type="pil"),
outputs=gr.File(label="Extracted Excel File"),
title="Table Data Extractor",
description="""
**Extract Table Data from Images**
Upload an image of a financial statement (e.g., income statement, balance sheet) to extract the table data into an Excel file.
**Instructions:**
1. Click 'Browse' or drag and drop an image file.
2. Wait for the processing to complete.
3. Download the extracted Excel file.
""",
allow_flagging="never",
examples=[],
)
if __name__ == "__main__":
iface.launch()