pratikshahp commited on
Commit
ce975c6
·
verified ·
1 Parent(s): 253113c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -58
app.py CHANGED
@@ -1,63 +1,44 @@
1
-
2
-
3
- #Invoice Extractor
4
- import os
5
- import streamlit as st
6
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- # Load model directly
9
- from transformers import AutoProcessor, AutoModelForSeq2SeqLM
10
-
11
-
12
-
13
- def get_model_response(input_text, image, prompt):
14
- # Load the pre-trained model
15
- processor = AutoProcessor.from_pretrained("google/pix2struct-base")
16
- model = AutoModelForSeq2SeqLM.from_pretrained("google/pix2struct-base")
17
-
18
- # Tokenize inputs
19
- inputs = tokenizer(input_text, image, prompt, return_tensors="pt", max_length=512, truncation=True)
20
-
21
- # Generate response from the model
22
- outputs = model(**inputs)
23
- response = tokenizer.decode(outputs.logits.argmax(dim=-1))
24
-
25
- return response
26
-
27
- #function to provide image
28
- def input_image_setup(uploaded_file):
29
- if uploaded_file is not None:
30
- #read te file into byte
31
- bytes_data = uploaded_file.getvalue()
32
- image_parts=[
33
- {
34
- "mime_type": uploaded_file.type,
35
- "data": bytes_data
36
- }
37
- ]
38
- return image_parts
39
- else:
40
- raise FileNotFoundError("No file uploaded")
41
 
42
- #Streamlit App
43
- st.set_page_config(page_title="Invoice Extractor")
44
- st.header("Gemini Application")
45
- input=st.text_input("Input Prompt: ",key="input")
46
- uploaded_file = st.file_uploader("choose an image...", type=["jpg","jpeg","png"])
47
- image=""
48
  if uploaded_file is not None:
49
  image = Image.open(uploaded_file)
50
- st.image(image, caption="Uploaded Image.", use_column_width=True)
51
- submit = st.button("Tell me about the invoice")
52
- input_prompt="""
53
- You are an expert in understanding invoices. you will receive input images as invoices and
54
- you will have to answer questions based on the input image.
55
- """
56
- #if submit button is clicked,
57
- if submit:
58
- image_data=input_image_setup(uploaded_file)
59
- response=get_model_response(input_prompt,image_data,input)
60
- st.subheader("The response is..")
61
- st.write(response)
62
-
63
-
 
 
 
 
 
 
1
  from PIL import Image
2
+ import streamlit as st
3
+ from transformers import AutoModelForImageToText, AutoTokenizer, AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
+
6
+ # Load Hugging Face models
7
+ image_to_text_model = AutoModelForImageToText.from_pretrained("microsoft/layoutlm-base-uncased")
8
+ image_to_text_tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
9
+ text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2")
10
+ text_generation_tokenizer = AutoTokenizer.from_pretrained("gpt2")
11
+
12
+ # Function to extract text from image
13
+ def extract_text_from_image(image):
14
+ inputs = image_to_text_tokenizer(image, return_tensors="pt")
15
+ with torch.no_grad():
16
+ outputs = image_to_text_model(**inputs)
17
+ return image_to_text_tokenizer.batch_decode(outputs.logits, skip_special_tokens=True)[0]
18
+
19
+ # Function to generate response
20
+ def generate_response(prompt, text):
21
+ inputs = text_generation_tokenizer(prompt + text, return_tensors="pt")
22
+ with torch.no_grad():
23
+ outputs = text_generation_model.generate(**inputs)
24
+ return text_generation_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
25
+
26
+ # Streamlit App
27
+ st.set_page_config(page_title="Invoice Extractor")
28
+ st.header("Invoice Extractor")
29
 
30
+ prompt = st.text_input("Prompt: ", "Please provide details about this invoice.")
31
+ uploaded_file = st.file_uploader("Upload an invoice image...", type=["jpg", "jpeg", "png"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
 
 
 
 
 
 
33
  if uploaded_file is not None:
34
  image = Image.open(uploaded_file)
35
+ st.image(image, caption="Uploaded Image.", use_column_width=True)
36
+
37
+ if st.button("Extract and Generate"):
38
+ if image:
39
+ extracted_text = extract_text_from_image(image)
40
+ response = generate_response(prompt, extracted_text)
41
+ st.subheader("Generated Response:")
42
+ st.write(response)
43
+ else:
44
+ st.warning("Please upload an image.")