pratikshahp commited on
Commit
c1dab52
·
verified ·
1 Parent(s): e3dd843

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -23
app.py CHANGED
@@ -1,29 +1,35 @@
1
  import streamlit as st
2
  from PIL import Image
3
- from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor
4
  import torch
5
-
6
- # Load LayoutLMv3 model and processor
7
- model_name = "microsoft/layoutlmv3-base"
8
- layoutlm_model = LayoutLMv3ForTokenClassification.from_pretrained(model_name)
9
- layoutlm_processor = LayoutLMv3Processor.from_pretrained(model_name)
10
-
11
- def extract_text_from_image(image, question):
12
- # Preprocess the image using the processor
13
- encoding = layoutlm_processor(images=image, return_tensors="pt")
14
-
15
- # Get model predictions
 
 
 
 
 
 
 
 
 
 
 
16
  with torch.no_grad():
17
- outputs = layoutlm_model(**encoding)
18
-
19
- # Get the predicted tokens and labels
20
- predicted_tokens = outputs.logits.argmax(-1).squeeze().tolist()
21
- tokens = layoutlm_processor.tokenizer.convert_ids_to_tokens(predicted_tokens)
22
-
23
- # Filter out special tokens and join the remaining tokens into a string
24
- extracted_text = " ".join([token for token in tokens if token not in layoutlm_processor.tokenizer.all_special_tokens])
25
 
26
- return extracted_text
 
 
27
 
28
  # Streamlit App
29
  st.set_page_config(page_title="Invoice Extractor")
@@ -43,6 +49,7 @@ if submit:
43
  if image is None:
44
  st.warning("Please upload an image.")
45
  else:
46
- extracted_text = extract_text_from_image(image, question)
 
47
  st.subheader("Extracted Information:")
48
- st.write(extracted_text)
 
1
  import streamlit as st
2
  from PIL import Image
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import torch
5
+ import easyocr
6
+
7
+ # Load the question-answering model and tokenizer
8
+ model_name = "t5-base"
9
+ qa_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
+ qa_tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ reader = easyocr.Reader(['en'])
12
+
13
+ # Function to extract text from image using easyocr
14
+ def extract_text_from_image(image):
15
+ # Perform OCR on the image using easyocr
16
+ ocr_result = reader.readtext(image, detail=0)
17
+ text = " ".join(ocr_result)
18
+ return text
19
+
20
+ # Function to get response from the language model
21
+ def get_response_from_llm(extracted_text, question):
22
+ # Prepare the input for the model
23
+ input_text = f"question: {question} context: {extracted_text}"
24
+ inputs = qa_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
25
+
26
+ # Generate the response
27
  with torch.no_grad():
28
+ outputs = qa_model.generate(inputs, max_length=150, num_return_sequences=1)
 
 
 
 
 
 
 
29
 
30
+ # Decode the response
31
+ response = qa_tokenizer.decode(outputs[0], skip_special_tokens=True)
32
+ return response
33
 
34
  # Streamlit App
35
  st.set_page_config(page_title="Invoice Extractor")
 
49
  if image is None:
50
  st.warning("Please upload an image.")
51
  else:
52
+ extracted_text = extract_text_from_image(image)
53
+ response = get_response_from_llm(extracted_text, question)
54
  st.subheader("Extracted Information:")
55
+ st.write(response)