Shreyas094 commited on
Commit
eed78bd
·
verified ·
1 Parent(s): adb779a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -58
app.py CHANGED
@@ -1,68 +1,64 @@
1
- import os
2
- os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
3
-
4
  import gradio as gr
5
- from transformers import AutoProcessor, AutoModel
6
- from PIL import Image
7
- from pdf2image import convert_from_path
8
- from PyPDF2 import PdfReader
9
  import os
10
- import torch
 
 
11
 
12
- # Load the LayoutLMv3 model and processor
13
- processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
14
- model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
 
 
15
 
16
- def process_pdf(pdf, page_number):
17
- try:
18
- reader = PdfReader(pdf.name)
19
- num_pages = len(reader.pages)
20
- if page_number > num_pages or page_number < 1:
21
- return f"Invalid page number. The document has {num_pages} pages."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- images = convert_from_path(pdf.name, first_page=page_number, last_page=page_number)
24
- return process_image(images[0])
25
- except Exception as e:
26
- return f"Error processing PDF: {str(e)}"
27
 
28
- def process_image(image):
29
- try:
30
- image = image.convert("RGB")
31
-
32
- # Process the image with LayoutLMv3
33
- encoded_inputs = processor(image, return_tensors="pt")
34
- with torch.no_grad():
35
- outputs = model(**encoded_inputs)
36
-
37
- # Extract relevant information from the model output
38
- last_hidden_states = outputs.last_hidden_state
39
-
40
- # Here you would implement logic to interpret the model output
41
- # For now, we'll just return some basic information
42
- return f"Image processed successfully. Output shape: {last_hidden_states.shape}"
43
- except Exception as e:
44
- return f"Error processing image: {str(e)}"
45
 
46
- def extract_table(file, page_number):
47
- try:
48
- file_ext = os.path.splitext(file.name)[1].lower()
49
- if file_ext == ".pdf":
50
- return process_pdf(file, page_number)
51
- elif file_ext in [".png", ".jpg", ".jpeg"]:
52
- image = Image.open(file)
53
- return process_image(image)
54
- else:
55
- return "Unsupported file format. Please upload a PDF or an image (PNG, JPG, JPEG)."
56
- except Exception as e:
57
- return f"Error: {str(e)}"
58
 
59
- with gr.Blocks() as demo:
60
- gr.Markdown("# Upload a PDF or an image of a table")
61
- with gr.Row():
62
- file_input = gr.File(label="Upload PDF or Image", file_types=["pdf", "png", "jpg", "jpeg"])
63
- page_input = gr.Number(label="Page number for PDF (ignore if uploading an image)", value=1, precision=0)
64
- output = gr.Textbox(label="Result")
65
- file_input.change(fn=extract_table, inputs=[file_input, page_input], outputs=output)
 
66
 
67
  if __name__ == "__main__":
68
- demo.launch()
 
 
 
 
1
  import gradio as gr
2
+ from paddleocr import PPStructure, save_structure_res
3
+ import cv2
 
 
4
  import os
5
+ import numpy as np
6
+ import shutil
7
+ import uuid
8
 
9
+ # Initialize the PP-Structure table model once to avoid reloading for each request
10
+ table_engine = PPStructure(
11
+ show_log=False,
12
+ image_orientation=True,
13
+ )
14
 
15
+ def extract_table(image):
16
+ # Generate a unique ID for the session to avoid conflicts
17
+ session_id = str(uuid.uuid4())
18
+ save_folder = os.path.join('./output', session_id)
19
+ os.makedirs(save_folder, exist_ok=True)
20
+
21
+ # Convert PIL Image to OpenCV format
22
+ image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
23
+
24
+ # Perform table recognition on the image
25
+ result = table_engine(image_cv)
26
+
27
+ # Save the recognized table data
28
+ img_name = 'input_image' # You can customize this as needed
29
+ save_structure_res(result, save_folder, img_name)
30
+
31
+ # The save_structure_res function saves an Excel file with extension .xlsx
32
+ excel_file = os.path.join(save_folder, img_name, 'excel', img_name + '.xlsx')
33
+ if os.path.exists(excel_file):
34
+ # Return the path to the Excel file for download
35
+ return excel_file
36
+ else:
37
+ # Handle case where no table was detected
38
+ return "No table detected in the image."
39
 
40
+ # Clean up temporary files (optional)
41
+ # shutil.rmtree(save_folder)
 
 
42
 
43
+ # Define the Gradio interface
44
+ iface = gr.Interface(
45
+ fn=extract_table,
46
+ inputs=gr.Image(type="pil"),
47
+ outputs=gr.File(label="Extracted Excel File"),
48
+ title="Table Data Extractor",
49
+ description="""
50
+ **Extract Table Data from Images**
 
 
 
 
 
 
 
 
 
51
 
52
+ Upload an image of a financial statement (e.g., income statement, balance sheet) to extract the table data into an Excel file.
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ **Instructions:**
55
+ 1. Click 'Browse' or drag and drop an image file.
56
+ 2. Wait for the processing to complete.
57
+ 3. Download the extracted Excel file.
58
+ """,
59
+ allow_flagging="never",
60
+ examples=[],
61
+ )
62
 
63
  if __name__ == "__main__":
64
+ iface.launch()