Shreyas094 commited on
Commit
b1d6d03
·
verified ·
1 Parent(s): ff43ec3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -32
app.py CHANGED
@@ -4,54 +4,59 @@ from PIL import Image
4
  from pdf2image import convert_from_path
5
  from PyPDF2 import PdfReader
6
  import os
 
7
 
8
  # Load the LayoutLMv3 model and processor
9
  processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
10
  model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
11
 
12
- # Helper function to process PDF page
13
  def process_pdf(pdf, page_number):
14
- reader = PdfReader(pdf.name)
15
- num_pages = len(reader.pages)
16
- if page_number > num_pages or page_number < 1:
17
- return f"Invalid page number. The document has {num_pages} pages."
 
18
 
19
- # Convert PDF page to image
20
- images = convert_from_path(pdf.name, first_page=page_number, last_page=page_number)
21
- return process_image(images[0])
 
22
 
23
- # Helper function to process image (for both direct image input and PDF page conversion)
24
  def process_image(image):
25
- image = image.convert("RGB")
26
- # Tokenize and process the image with LayoutLMv3
27
- encoded_inputs = processor(image, return_tensors="pt")
28
- outputs = model(**encoded_inputs)
29
-
30
- # Placeholder for further processing (e.g., extracting tokens, detecting tables, etc.)
31
- return "Image processed with LayoutLMv3."
 
 
 
 
 
 
32
 
33
- # Gradio interface function
34
  def extract_table(file, page_number):
35
- file_ext = os.path.splitext(file.name)[1].lower()
36
- if file_ext == ".pdf":
37
- return process_pdf(file, page_number)
38
- elif file_ext in [".png", ".jpg", ".jpeg"]:
39
- image = Image.open(file)
40
- return process_image(image)
41
- else:
42
- return "Unsupported file format. Please upload a PDF or an image."
 
 
 
43
 
44
- # Define the Gradio interface
45
  with gr.Blocks() as demo:
46
  gr.Markdown("# Upload a PDF or an image of a table")
47
-
48
  with gr.Row():
49
- # Specify file types explicitly: PDFs and images (jpg, png)
50
- file_input = gr.File(label="Upload PDF or Image", file_types=["pdf", "image"])
51
  page_input = gr.Number(label="Page number for PDF (ignore if uploading an image)", value=1, precision=0)
52
-
53
  output = gr.Textbox(label="Result")
54
-
55
  file_input.change(fn=extract_table, inputs=[file_input, page_input], outputs=output)
56
 
57
- demo.launch()
 
 
4
  from pdf2image import convert_from_path
5
  from PyPDF2 import PdfReader
6
  import os
7
+ import torch
8
 
9
  # Load the LayoutLMv3 model and processor
10
  processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
11
  model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
12
 
 
13
  def process_pdf(pdf, page_number):
14
+ try:
15
+ reader = PdfReader(pdf.name)
16
+ num_pages = len(reader.pages)
17
+ if page_number > num_pages or page_number < 1:
18
+ return f"Invalid page number. The document has {num_pages} pages."
19
 
20
+ images = convert_from_path(pdf.name, first_page=page_number, last_page=page_number)
21
+ return process_image(images[0])
22
+ except Exception as e:
23
+ return f"Error processing PDF: {str(e)}"
24
 
 
25
  def process_image(image):
26
+ try:
27
+ image = image.convert("RGB")
28
+ encoded_inputs = processor(image, return_tensors="pt")
29
+ with torch.no_grad():
30
+ outputs = model(**encoded_inputs)
31
+
32
+ # Placeholder for table extraction logic
33
+ # You would implement your table detection and extraction here
34
+ # For now, we'll just return the shape of the last hidden states
35
+ last_hidden_states = outputs.last_hidden_state
36
+ return f"Image processed. Output shape: {last_hidden_states.shape}"
37
+ except Exception as e:
38
+ return f"Error processing image: {str(e)}"
39
 
 
40
  def extract_table(file, page_number):
41
+ try:
42
+ file_ext = os.path.splitext(file.name)[1].lower()
43
+ if file_ext == ".pdf":
44
+ return process_pdf(file, page_number)
45
+ elif file_ext in [".png", ".jpg", ".jpeg"]:
46
+ image = Image.open(file)
47
+ return process_image(image)
48
+ else:
49
+ return "Unsupported file format. Please upload a PDF or an image (PNG, JPG, JPEG)."
50
+ except Exception as e:
51
+ return f"Error: {str(e)}"
52
 
 
53
  with gr.Blocks() as demo:
54
  gr.Markdown("# Upload a PDF or an image of a table")
 
55
  with gr.Row():
56
+ file_input = gr.File(label="Upload PDF or Image", file_types=["pdf", "png", "jpg", "jpeg"])
 
57
  page_input = gr.Number(label="Page number for PDF (ignore if uploading an image)", value=1, precision=0)
 
58
  output = gr.Textbox(label="Result")
 
59
  file_input.change(fn=extract_table, inputs=[file_input, page_input], outputs=output)
60
 
61
+ if __name__ == "__main__":
62
+ demo.launch()