pratikshahp commited on
Commit
36d2263
·
verified ·
1 Parent(s): 279f755

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -34
app.py CHANGED
@@ -1,46 +1,36 @@
1
- import os
2
  import streamlit as st
3
  from PIL import Image
4
- from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
5
  import torch
6
 
7
- # Load LayoutLM model and tokenizer
8
- model_name = "microsoft/layoutlm-base-uncased"
9
- layoutlm_model = LayoutLMForTokenClassification.from_pretrained(model_name)
10
- layoutlm_tokenizer = LayoutLMTokenizer.from_pretrained(model_name)
11
-
12
- # Function to extract text from image using LayoutLM
13
- def extract_text_from_image(image):
14
- # Convert image to text using OCR (Optical Character Recognition) before tokenizing
15
- # For simplicity, we'll assume the image is converted to text directly without OCR
16
- # You may need to use OCR libraries like pytesseract for real-world scenarios
17
- text = image_to_text(image)
18
- inputs = layoutlm_tokenizer(text, return_tensors="pt")
19
  with torch.no_grad():
20
- outputs = layoutlm_model(**inputs)
21
- predicted_tokens = layoutlm_tokenizer.convert_ids_to_tokens(outputs.logits.argmax(2).squeeze().tolist())
22
- # Filter special tokens and concatenate text tokens
23
- extracted_text = " ".join([token for token in predicted_tokens if token not in ['[CLS]', '[SEP]', '[PAD]']])
24
- return extracted_text
25
 
26
- # Function to convert image to text (replace with OCR library if needed)
27
- def image_to_text(image):
28
- # For simplicity, return a placeholder text
29
- return "Invoice text extracted from the image."
30
 
31
- # Function to extract information from the invoice based on user input
32
- def extract_information(extracted_text, prompt):
33
- # Implement your logic to extract relevant information from the extracted text
34
- # For simplicity, let's return a placeholder response
35
- response = f"Extracted information based on prompt: {prompt}"
36
- return response
37
 
38
  # Streamlit App
39
  st.set_page_config(page_title="Invoice Extractor")
40
  st.header("Invoice Extractor")
41
 
42
- prompt = st.text_input("Input Prompt: ", "Please provide details about this invoice.")
43
  uploaded_file = st.file_uploader("Upload an invoice image...", type=["jpg", "jpeg", "png"])
 
44
  image = None
45
 
46
  if uploaded_file is not None:
@@ -53,8 +43,6 @@ if submit:
53
  if image is None:
54
  st.warning("Please upload an image.")
55
  else:
56
- extracted_text = extract_text_from_image(image)
57
- # Extract specific information from the invoice based on user input prompt
58
- extracted_info = extract_information(extracted_text, prompt)
59
  st.subheader("Extracted Information:")
60
- st.write(extracted_info)
 
 
1
  import streamlit as st
2
  from PIL import Image
3
+ from transformers import LayoutLMv2ForTokenClassification, LayoutLMv2Processor
4
  import torch
5
 
6
+ # Load LayoutLMv2 model and processor
7
+ model_name = "microsoft/layoutlmv2-base-uncased"
8
+ layoutlm_model = LayoutLMv2ForTokenClassification.from_pretrained(model_name)
9
+ layoutlm_processor = LayoutLMv2Processor.from_pretrained(model_name)
10
+
11
+ def extract_text_from_image(image, question):
12
+ # Preprocess the image using the processor
13
+ encoding = layoutlm_processor(image, return_tensors="pt")
14
+
15
+ # Get model predictions
 
 
16
  with torch.no_grad():
17
+ outputs = layoutlm_model(**encoding)
 
 
 
 
18
 
19
+ # Get the predicted tokens and labels
20
+ predicted_tokens = outputs.logits.argmax(-1).squeeze().tolist()
21
+ tokens = layoutlm_processor.tokenizer.convert_ids_to_tokens(predicted_tokens)
 
22
 
23
+ # Filter out special tokens and join the remaining tokens into a string
24
+ extracted_text = " ".join([token for token in tokens if token not in layoutlm_processor.tokenizer.all_special_tokens])
25
+
26
+ return extracted_text
 
 
27
 
28
  # Streamlit App
29
  st.set_page_config(page_title="Invoice Extractor")
30
  st.header("Invoice Extractor")
31
 
 
32
  uploaded_file = st.file_uploader("Upload an invoice image...", type=["jpg", "jpeg", "png"])
33
+ question = st.text_input("Enter your question about the invoice:")
34
  image = None
35
 
36
  if uploaded_file is not None:
 
43
  if image is None:
44
  st.warning("Please upload an image.")
45
  else:
46
+ extracted_text = extract_text_from_image(image, question)
 
 
47
  st.subheader("Extracted Information:")
48
+ st.write(extracted_text)