Update app.py
Browse files
app.py
CHANGED
|
@@ -4,54 +4,59 @@ from PIL import Image
|
|
| 4 |
from pdf2image import convert_from_path
|
| 5 |
from PyPDF2 import PdfReader
|
| 6 |
import os
|
|
|
|
| 7 |
|
| 8 |
# Load the LayoutLMv3 model and processor
|
| 9 |
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
|
| 10 |
model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
| 11 |
|
| 12 |
-
# Helper function to process PDF page
|
| 13 |
def process_pdf(pdf, page_number):
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
-
# Helper function to process image (for both direct image input and PDF page conversion)
|
| 24 |
def process_image(image):
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
# Gradio interface function
|
| 34 |
def extract_table(file, page_number):
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
# Define the Gradio interface
|
| 45 |
with gr.Blocks() as demo:
|
| 46 |
gr.Markdown("# Upload a PDF or an image of a table")
|
| 47 |
-
|
| 48 |
with gr.Row():
|
| 49 |
-
|
| 50 |
-
file_input = gr.File(label="Upload PDF or Image", file_types=["pdf", "image"])
|
| 51 |
page_input = gr.Number(label="Page number for PDF (ignore if uploading an image)", value=1, precision=0)
|
| 52 |
-
|
| 53 |
output = gr.Textbox(label="Result")
|
| 54 |
-
|
| 55 |
file_input.change(fn=extract_table, inputs=[file_input, page_input], outputs=output)
|
| 56 |
|
| 57 |
-
|
|
|
|
|
|
| 4 |
from pdf2image import convert_from_path
|
| 5 |
from PyPDF2 import PdfReader
|
| 6 |
import os
|
| 7 |
+
import torch
|
| 8 |
|
| 9 |
# Load the LayoutLMv3 model and processor
|
| 10 |
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
|
| 11 |
model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
| 12 |
|
|
|
|
| 13 |
def process_pdf(pdf, page_number):
|
| 14 |
+
try:
|
| 15 |
+
reader = PdfReader(pdf.name)
|
| 16 |
+
num_pages = len(reader.pages)
|
| 17 |
+
if page_number > num_pages or page_number < 1:
|
| 18 |
+
return f"Invalid page number. The document has {num_pages} pages."
|
| 19 |
|
| 20 |
+
images = convert_from_path(pdf.name, first_page=page_number, last_page=page_number)
|
| 21 |
+
return process_image(images[0])
|
| 22 |
+
except Exception as e:
|
| 23 |
+
return f"Error processing PDF: {str(e)}"
|
| 24 |
|
|
|
|
| 25 |
def process_image(image):
|
| 26 |
+
try:
|
| 27 |
+
image = image.convert("RGB")
|
| 28 |
+
encoded_inputs = processor(image, return_tensors="pt")
|
| 29 |
+
with torch.no_grad():
|
| 30 |
+
outputs = model(**encoded_inputs)
|
| 31 |
+
|
| 32 |
+
# Placeholder for table extraction logic
|
| 33 |
+
# You would implement your table detection and extraction here
|
| 34 |
+
# For now, we'll just return the shape of the last hidden states
|
| 35 |
+
last_hidden_states = outputs.last_hidden_state
|
| 36 |
+
return f"Image processed. Output shape: {last_hidden_states.shape}"
|
| 37 |
+
except Exception as e:
|
| 38 |
+
return f"Error processing image: {str(e)}"
|
| 39 |
|
|
|
|
| 40 |
def extract_table(file, page_number):
|
| 41 |
+
try:
|
| 42 |
+
file_ext = os.path.splitext(file.name)[1].lower()
|
| 43 |
+
if file_ext == ".pdf":
|
| 44 |
+
return process_pdf(file, page_number)
|
| 45 |
+
elif file_ext in [".png", ".jpg", ".jpeg"]:
|
| 46 |
+
image = Image.open(file)
|
| 47 |
+
return process_image(image)
|
| 48 |
+
else:
|
| 49 |
+
return "Unsupported file format. Please upload a PDF or an image (PNG, JPG, JPEG)."
|
| 50 |
+
except Exception as e:
|
| 51 |
+
return f"Error: {str(e)}"
|
| 52 |
|
|
|
|
| 53 |
with gr.Blocks() as demo:
|
| 54 |
gr.Markdown("# Upload a PDF or an image of a table")
|
|
|
|
| 55 |
with gr.Row():
|
| 56 |
+
file_input = gr.File(label="Upload PDF or Image", file_types=["pdf", "png", "jpg", "jpeg"])
|
|
|
|
| 57 |
page_input = gr.Number(label="Page number for PDF (ignore if uploading an image)", value=1, precision=0)
|
|
|
|
| 58 |
output = gr.Textbox(label="Result")
|
|
|
|
| 59 |
file_input.change(fn=extract_table, inputs=[file_input, page_input], outputs=output)
|
| 60 |
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
demo.launch()
|