Spaces:

pratikshahp
/

Invoice-Extractor

Sleeping

App Files Files Community

pratikshahp commited on Jun 11, 2024

Commit

36d2263

verified ·

1 Parent(s): 279f755

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -34

app.py CHANGED Viewed

@@ -1,46 +1,36 @@
-import os
 import streamlit as st
 from PIL import Image
-from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
 import torch
-# Load LayoutLM model and tokenizer
-model_name = "microsoft/layoutlm-base-uncased"
-layoutlm_model = LayoutLMForTokenClassification.from_pretrained(model_name)
-layoutlm_tokenizer = LayoutLMTokenizer.from_pretrained(model_name)
-# Function to extract text from image using LayoutLM
-def extract_text_from_image(image):
-    # Convert image to text using OCR (Optical Character Recognition) before tokenizing
-    # For simplicity, we'll assume the image is converted to text directly without OCR
-    # You may need to use OCR libraries like pytesseract for real-world scenarios
-    text = image_to_text(image)
-    inputs = layoutlm_tokenizer(text, return_tensors="pt")
     with torch.no_grad():
-        outputs = layoutlm_model(**inputs)
-    predicted_tokens = layoutlm_tokenizer.convert_ids_to_tokens(outputs.logits.argmax(2).squeeze().tolist())
-    # Filter special tokens and concatenate text tokens
-    extracted_text = " ".join([token for token in predicted_tokens if token not in ['[CLS]', '[SEP]', '[PAD]']])
-    return extracted_text
-# Function to convert image to text (replace with OCR library if needed)
-def image_to_text(image):
-    # For simplicity, return a placeholder text
-    return "Invoice text extracted from the image."
-# Function to extract information from the invoice based on user input
-def extract_information(extracted_text, prompt):
-    # Implement your logic to extract relevant information from the extracted text
-    # For simplicity, let's return a placeholder response
-    response = f"Extracted information based on prompt: {prompt}"
-    return response
 # Streamlit App
 st.set_page_config(page_title="Invoice Extractor")
 st.header("Invoice Extractor")
-prompt = st.text_input("Input Prompt: ", "Please provide details about this invoice.")
 uploaded_file = st.file_uploader("Upload an invoice image...", type=["jpg", "jpeg", "png"])
 image = None
 if uploaded_file is not None:
@@ -53,8 +43,6 @@ if submit:
     if image is None:
         st.warning("Please upload an image.")
     else:
-        extracted_text = extract_text_from_image(image)
-        # Extract specific information from the invoice based on user input prompt
-        extracted_info = extract_information(extracted_text, prompt)
         st.subheader("Extracted Information:")
-        st.write(extracted_info)

 import streamlit as st
 from PIL import Image
+from transformers import LayoutLMv2ForTokenClassification, LayoutLMv2Processor
 import torch
+# Load LayoutLMv2 model and processor
+model_name = "microsoft/layoutlmv2-base-uncased"
+layoutlm_model = LayoutLMv2ForTokenClassification.from_pretrained(model_name)
+layoutlm_processor = LayoutLMv2Processor.from_pretrained(model_name)
+def extract_text_from_image(image, question):
+    # Preprocess the image using the processor
+    encoding = layoutlm_processor(image, return_tensors="pt")
+    # Get model predictions
     with torch.no_grad():
+        outputs = layoutlm_model(**encoding)
+    # Get the predicted tokens and labels
+    predicted_tokens = outputs.logits.argmax(-1).squeeze().tolist()
+    tokens = layoutlm_processor.tokenizer.convert_ids_to_tokens(predicted_tokens)
+    # Filter out special tokens and join the remaining tokens into a string
+    extracted_text = " ".join([token for token in tokens if token not in layoutlm_processor.tokenizer.all_special_tokens])
+    return extracted_text
 # Streamlit App
 st.set_page_config(page_title="Invoice Extractor")
 st.header("Invoice Extractor")
 uploaded_file = st.file_uploader("Upload an invoice image...", type=["jpg", "jpeg", "png"])
+question = st.text_input("Enter your question about the invoice:")
 image = None
 if uploaded_file is not None:
     if image is None:
         st.warning("Please upload an image.")
     else:
+        extracted_text = extract_text_from_image(image, question)
         st.subheader("Extracted Information:")
+        st.write(extracted_text)