Shreyas094 commited on
Commit
4fb0a53
·
verified ·
1 Parent(s): 06861fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -5,6 +5,8 @@ from pdf2image import convert_from_path
5
  from PyPDF2 import PdfReader
6
  import os
7
  import torch
 
 
8
 
9
  # Load the LayoutLMv3 model and processor
10
  processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
@@ -25,15 +27,20 @@ def process_pdf(pdf, page_number):
25
  def process_image(image):
26
  try:
27
  image = image.convert("RGB")
 
 
 
 
 
 
 
28
  encoded_inputs = processor(image, return_tensors="pt")
29
  with torch.no_grad():
30
  outputs = model(**encoded_inputs)
31
 
32
- # Placeholder for table extraction logic
33
- # You would implement your table detection and extraction here
34
- # For now, we'll just return the shape of the last hidden states
35
  last_hidden_states = outputs.last_hidden_state
36
- return f"Image processed. Output shape: {last_hidden_states.shape}"
37
  except Exception as e:
38
  return f"Error processing image: {str(e)}"
39
 
 
5
  from PyPDF2 import PdfReader
6
  import os
7
  import torch
8
+ import pytesseract
9
+ from pytesseract import TesseractError
10
 
11
  # Load the LayoutLMv3 model and processor
12
  processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
 
27
  def process_image(image):
28
  try:
29
  image = image.convert("RGB")
30
+
31
+ # Use Tesseract to extract text from the image
32
+ try:
33
+ text = pytesseract.image_to_string(image)
34
+ except TesseractError as e:
35
+ return f"Tesseract Error: {str(e)}. Make sure Tesseract is installed and in your PATH."
36
+
37
  encoded_inputs = processor(image, return_tensors="pt")
38
  with torch.no_grad():
39
  outputs = model(**encoded_inputs)
40
 
41
+ # For now, we'll just return the extracted text and the shape of the last hidden states
 
 
42
  last_hidden_states = outputs.last_hidden_state
43
+ return f"Extracted Text:\n{text}\n\nImage processed. Output shape: {last_hidden_states.shape}"
44
  except Exception as e:
45
  return f"Error processing image: {str(e)}"
46