Spaces:

Shreyas094
/

Layout-LM-Table-Extractor

Sleeping

App Files Files Community

Shreyas094 commited on Sep 21, 2024

Commit

eed78bd

verified ·

1 Parent(s): adb779a

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -58

app.py CHANGED Viewed

@@ -1,68 +1,64 @@
-import os
-os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
 import gradio as gr
-from transformers import AutoProcessor, AutoModel
-from PIL import Image
-from pdf2image import convert_from_path
-from PyPDF2 import PdfReader
 import os
-import torch
-# Load the LayoutLMv3 model and processor
-processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
-model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
-def process_pdf(pdf, page_number):
-    try:
-        reader = PdfReader(pdf.name)
-        num_pages = len(reader.pages)
-        if page_number > num_pages or page_number < 1:
-            return f"Invalid page number. The document has {num_pages} pages."
-        images = convert_from_path(pdf.name, first_page=page_number, last_page=page_number)
-        return process_image(images[0])
-    except Exception as e:
-        return f"Error processing PDF: {str(e)}"
-def process_image(image):
-    try:
-        image = image.convert("RGB")
-        # Process the image with LayoutLMv3
-        encoded_inputs = processor(image, return_tensors="pt")
-        with torch.no_grad():
-            outputs = model(**encoded_inputs)
-        # Extract relevant information from the model output
-        last_hidden_states = outputs.last_hidden_state
-        # Here you would implement logic to interpret the model output
-        # For now, we'll just return some basic information
-        return f"Image processed successfully. Output shape: {last_hidden_states.shape}"
-    except Exception as e:
-        return f"Error processing image: {str(e)}"
-def extract_table(file, page_number):
-    try:
-        file_ext = os.path.splitext(file.name)[1].lower()
-        if file_ext == ".pdf":
-            return process_pdf(file, page_number)
-        elif file_ext in [".png", ".jpg", ".jpeg"]:
-            image = Image.open(file)
-            return process_image(image)
-        else:
-            return "Unsupported file format. Please upload a PDF or an image (PNG, JPG, JPEG)."
-    except Exception as e:
-        return f"Error: {str(e)}"
-with gr.Blocks() as demo:
-    gr.Markdown("# Upload a PDF or an image of a table")
-    with gr.Row():
-        file_input = gr.File(label="Upload PDF or Image", file_types=["pdf", "png", "jpg", "jpeg"])
-        page_input = gr.Number(label="Page number for PDF (ignore if uploading an image)", value=1, precision=0)
-    output = gr.Textbox(label="Result")
-    file_input.change(fn=extract_table, inputs=[file_input, page_input], outputs=output)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from paddleocr import PPStructure, save_structure_res
+import cv2
 import os
+import numpy as np
+import shutil
+import uuid
+# Initialize the PP-Structure table model once to avoid reloading for each request
+table_engine = PPStructure(
+    show_log=False,
+    image_orientation=True,
+    )
+def extract_table(image):
+    # Generate a unique ID for the session to avoid conflicts
+    session_id = str(uuid.uuid4())
+    save_folder = os.path.join('./output', session_id)
+    os.makedirs(save_folder, exist_ok=True)
+    # Convert PIL Image to OpenCV format
+    image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    # Perform table recognition on the image
+    result = table_engine(image_cv)
+    # Save the recognized table data
+    img_name = 'input_image'  # You can customize this as needed
+    save_structure_res(result, save_folder, img_name)
+    # The save_structure_res function saves an Excel file with extension .xlsx
+    excel_file = os.path.join(save_folder, img_name, 'excel', img_name + '.xlsx')
+    if os.path.exists(excel_file):
+        # Return the path to the Excel file for download
+        return excel_file
+    else:
+        # Handle case where no table was detected
+        return "No table detected in the image."
+    # Clean up temporary files (optional)
+    # shutil.rmtree(save_folder)
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=extract_table,
+    inputs=gr.Image(type="pil"),
+    outputs=gr.File(label="Extracted Excel File"),
+    title="Table Data Extractor",
+    description="""
+    **Extract Table Data from Images**
+    Upload an image of a financial statement (e.g., income statement, balance sheet) to extract the table data into an Excel file.
+    **Instructions:**
+    1. Click 'Browse' or drag and drop an image file.
+    2. Wait for the processing to complete.
+    3. Download the extracted Excel file.
+    """,
+    allow_flagging="never",
+    examples=[],
+)
 if __name__ == "__main__":
+    iface.launch()