Spaces:

Shreyas094
/

Layout-LM-Table-Extractor

Sleeping

Shreyas094 commited on Sep 21, 2024

Commit

c27d398

verified ·

1 Parent(s): b0af973

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,8 +5,6 @@ from pdf2image import convert_from_path
 from PyPDF2 import PdfReader
 import os
 import torch
-import pytesseract
-from pytesseract import TesseractError
 # Load the LayoutLMv3 model and processor
 processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
@@ -28,19 +26,17 @@ def process_image(image):
     try:
         image = image.convert("RGB")
-        # Use Tesseract to extract text from the image
-        try:
-            text = pytesseract.image_to_string(image)
-        except TesseractError as e:
-            return f"Tesseract Error: {str(e)}. Make sure Tesseract is installed and in your PATH."
         encoded_inputs = processor(image, return_tensors="pt")
         with torch.no_grad():
             outputs = model(**encoded_inputs)
-        # For now, we'll just return the extracted text and the shape of the last hidden states
         last_hidden_states = outputs.last_hidden_state
-        return f"Extracted Text:\n{text}\n\nImage processed. Output shape: {last_hidden_states.shape}"
     except Exception as e:
         return f"Error processing image: {str(e)}"

 from PyPDF2 import PdfReader
 import os
 import torch
 # Load the LayoutLMv3 model and processor
 processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
     try:
         image = image.convert("RGB")
+        # Process the image with LayoutLMv3
         encoded_inputs = processor(image, return_tensors="pt")
         with torch.no_grad():
             outputs = model(**encoded_inputs)
+        # Extract relevant information from the model output
         last_hidden_states = outputs.last_hidden_state
+        # Here you would implement logic to interpret the model output
+        # For now, we'll just return some basic information
+        return f"Image processed successfully. Output shape: {last_hidden_states.shape}"
     except Exception as e:
         return f"Error processing image: {str(e)}"