|
|
import gradio as gr |
|
|
from paddleocr import PPStructure, save_structure_res |
|
|
import cv2 |
|
|
import os |
|
|
import numpy as np |
|
|
import shutil |
|
|
import uuid |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
table_engine = PPStructure( |
|
|
show_log=False, |
|
|
image_orientation=True, |
|
|
) |
|
|
|
|
|
def extract_table(image): |
|
|
|
|
|
session_id = str(uuid.uuid4()) |
|
|
save_folder = os.path.join('./output', session_id) |
|
|
os.makedirs(save_folder, exist_ok=True) |
|
|
|
|
|
|
|
|
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) |
|
|
|
|
|
|
|
|
result = table_engine(image_cv) |
|
|
|
|
|
|
|
|
img_name = 'input_image' |
|
|
save_structure_res(result, save_folder, img_name) |
|
|
|
|
|
|
|
|
excel_file = os.path.join(save_folder, img_name, 'excel', img_name + '.xlsx') |
|
|
if os.path.exists(excel_file): |
|
|
|
|
|
return excel_file |
|
|
else: |
|
|
|
|
|
return "No table detected in the image." |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=extract_table, |
|
|
inputs=gr.Image(type="pil"), |
|
|
outputs=gr.File(label="Extracted Excel File"), |
|
|
title="Table Data Extractor", |
|
|
description=""" |
|
|
**Extract Table Data from Images** |
|
|
|
|
|
Upload an image of a financial statement (e.g., income statement, balance sheet) to extract the table data into an Excel file. |
|
|
|
|
|
**Instructions:** |
|
|
1. Click 'Browse' or drag and drop an image file. |
|
|
2. Wait for the processing to complete. |
|
|
3. Download the extracted Excel file. |
|
|
""", |
|
|
allow_flagging="never", |
|
|
examples=[], |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch() |